ADVANCE CAR PRICE PREDICTION
  • Home
  • Prediction
  • Code
  • Contact
  • WebScraping.py
  • DataSpliting.py
  • Train.py
  • Test.py
  • TrainModel.py
In [1]:
import pandas as pd
import numpy as np
import missingno as ms
import matplotlib.pyplot as plt
import copy
import re
import seaborn as sns
import string 


import warnings
warnings.filterwarnings("ignore")
In [2]:
#Read all the training dataframes
Maruti=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Maruti.csv")
Volvo=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Volvo.csv")
Volkswagen=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Volkswagen.csv")
Toyota=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Toyota.csv")
Tata=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Tata.csv")
Skoda=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Skoda.csv")
Rolls_Royce=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Rolls-Royce.csv")
Renault=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Renault.csv")
Porsche=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Porsche.csv")
Nissan=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Nissan.csv")
Mitsubishi=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Mitsubishi.csv")
Mini=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Mini.csv")
MG=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//MG.csv")
Mercedes_Benz=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Mercedes-Benz.csv")
Maserati=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Maserati.csv")
Aston_Martin=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Aston_Martin.csv")
Mahindra=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Mahindra.csv")
Lexus=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Lexus.csv")
Land_Rover=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Land_Rover.csv")
Lamborghini=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Lamborghini.csv")
Kia=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Kia.csv")
Jeep=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Jeep.csv")
Jaguar=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Maruti.csv")
Isuzu=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Isuzu.csv")
Hyundai=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Hyundai.csv")
Honda=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Honda.csv")
Ferrari=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Ferrari.csv")
Force=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Force.csv")
Bugatti=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Bugatti.csv")
BMW=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//BMW.csv")
Bentley=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Bentley.csv")
Audi=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Audi.csv")
Ford=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Ford.csv")
In [3]:
#combine all the dataframes
main_data=pd.concat([Maruti,Ford,Audi,Bentley,BMW,Bugatti,Force,Ferrari,Honda,Hyundai,Isuzu,Jaguar,Jeep,Kia,Lamborghini,Land_Rover,Lexus,Mahindra,Aston_Martin,Maserati,Mercedes_Benz,MG,Mini,Mitsubishi,Nissan,Porsche,Renault,Rolls_Royce,Skoda,Tata,Toyota,Volkswagen,Volvo],axis=0)
In [7]:
main_data.head(5)
Out[7]:
Unnamed: 0 Unnamed: 0.1 Model Brand Varient ARAI Mileage Engine Displacement (cc) Max Power (bhp@rpm) Seating Capacity Boot Space (Litres) ... Ground Clearance Unladen (mm) Electric Fuel Tank Capacity (Litres) Motor Power Diesel Mileage (WLTP) Rear Legroom (mm) Front Seat Base Length Rear Seat Base Length City driveability (20-50kmph) Petrol Overall Mileage Acceleration 0-60kmph
0 5728 0 Maruti Dzire Dzire ZXI Plus 23.26 kmpl 1197.0 88.50bhp@6000rpm 5.0 378.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 13973 0 Maruti Eeco Eeco 5 Seater AC 16.11 kmpl 1196.0 72.41bhp@6000rpm 5.0 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 15607 0 Maruti S-Cross S-Cross Zeta 18.55 kmpl 1462.0 103.25bhp@6000rpm 5.0 375.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 5199 0 Maruti Dzire Dzire ZXI Plus AT 24.12 kmpl 1197.0 88.50bhp@6000rpm 5.0 378.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 13671 0 Maruti S-Presso S-Presso LXi 21.4 kmpl 998.0 65.71bhp@5500rpm 5.0 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 299 columns

In [5]:
main_data.shape
Out[5]:
(160031, 299)
In [6]:
main_data.columns
Out[6]:
Index(['Unnamed: 0', 'Unnamed: 0.1', 'Model', 'Brand', 'Varient',
        'ARAI Mileage', 'Engine Displacement (cc)', 'Max Power (bhp@rpm)',
        'Seating Capacity', 'Boot Space (Litres)',
        ...
        'Ground Clearance Unladen (mm)', 'Electric Fuel Tank Capacity (Litres)',
        'Motor Power', 'Diesel Mileage (WLTP)', 'Rear Legroom (mm)',
        'Front Seat Base Length', 'Rear Seat Base Length',
        'City driveability (20-50kmph)', 'Petrol Overall Mileage',
        'Acceleration 0-60kmph'],
      dtype='object', length=299)
In [8]:
main_data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 160031 entries, 0 to 549
Columns: 299 entries, Unnamed: 0 to Acceleration 0-60kmph
dtypes: float64(33), int64(2), object(264)
memory usage: 366.3+ MB
In [9]:
pd.set_option("display.max_rows",4)
# pd.set_option("display.max_columns",None)
In [10]:
main_data.describe()
Out[10]:
Unnamed: 0 Unnamed: 0.1 Engine Displacement (cc) Seating Capacity No. of cylinder Fuel Tank Capacity Displacement (cc) Valves Per Cylinder Petrol Mileage (ARAI) Petrol Fuel Tank Capacity (Litres) ... Front Headroom (mm) Petrol Mileage (WLTP) Front Legroom Diesel Overall Mileage Petrol City Mileage Ground Clearance Unladen (mm) Electric Fuel Tank Capacity (Litres) Diesel Mileage (WLTP) Rear Legroom (mm) Petrol Overall Mileage
count 160031.000000 160031.000000 148820.000000 153784.00000 156553.000000 140439.000000 156553.000000 156553.000000 89098.000000 100865.00000 ... 1310.000000 2599.000000 462.000000 92.00 279.000000 14544.000000 192.000000 95.00 923.000000 846.000000
mean 10307.968756 1764.852685 1697.936447 5.15088 4.070832 48.566559 1745.041181 3.857428 17.771625 47.70058 ... 1034.641985 8.474282 441.246753 17.09 16.102366 193.647071 39.947917 6.99 406.990249 13.836525
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
75% 15364.500000 0.000000 1956.000000 5.00000 4.000000 52.000000 1991.000000 4.000000 20.270000 50.00000 ... 1045.000000 10.600000 348.000000 17.09 20.240000 209.000000 45.000000 6.99 360.000000 17.000000
max 49265.000000 24149.000000 7993.000000 8.00000 16.000000 100.000000 7993.000000 8.000000 39.530000 100.00000 ... 1059.000000 16.390000 944.000000 17.09 20.240000 238.000000 45.000000 6.99 911.000000 29.400000

8 rows × 35 columns

Preprocessing¶

In [11]:
main=copy.deepcopy(main_data)
In [12]:
def calculate_missing_percentage(df):
    percentages=[]
    feature=[]
    for i in df.columns:
        no_of_missing=df["{}".format(i)].isnull().sum()
        precent=(no_of_missing/df.shape[0])
        percentages.append(precent)
        feature.append(i)
    return pd.DataFrame({
        "Fearure":feature,
        "Percentage":percentages
    })
In [13]:
def remove_higher_percentage_missingcolumns(df,columns_name,threshold):
    no_of_missing=df["{}".format(columns_name)].isnull().sum()
    percent=(no_of_missing/df.shape[0])
    if(percent>=threshold):
        df.drop(["{}".format(columns_name)],axis=1,inplace=True)
    return "Successfully applied"
In [14]:
calculate_missing_percentage(main)
Out[14]:
Fearure Percentage
0 Unnamed: 0 0.000000
1 Unnamed: 0.1 0.000000
... ... ...
297 Petrol Overall Mileage 0.994714
298 Acceleration 0-60kmph 0.997625

299 rows × 2 columns

In [15]:
pd.Series(main.columns).apply(lambda x:remove_higher_percentage_missingcolumns(main,x,0.50))
Out[15]:
0      Successfully applied
1      Successfully applied
                ...         
297    Successfully applied
298    Successfully applied
Length: 299, dtype: object
In [16]:
main.columns
Out[16]:
Index(['Unnamed: 0', 'Unnamed: 0.1', 'Model', 'Brand', 'Varient',
        'ARAI Mileage', 'Engine Displacement (cc)', 'Max Power (bhp@rpm)',
        'Seating Capacity', 'Boot Space (Litres)',
        ...
        'Cup Holders-Front', 'Leather Seats', 'Driving Experience Control Eco',
        'Ventilated Seats', 'Tyre Pressure Monitor', 'Rain Sensing Wiper',
        'Turbo Charger', 'Air Quality Control', 'Traction Control',
        'Vehicle Stability Control System'],
      dtype='object', length=153)
In [17]:
main.drop(["Unnamed: 0","Unnamed: 0.1"],axis=1,inplace=True)
In [18]:
def target_preprocess(value):
    split_values=value.split("+")
    total=0
    for i in split_values:
        try:
            price=int("".join(re.findall("[0-9]",i)))
            total=total+price
        except:
            pass
    return total
In [12]:
#start with Target columns
main["Price"]
Out[12]:
0                 9,69,443*
1       5,58,668*+Rs.11,579
2      11,51,181*+Rs.40,166
3            10,63,940*+nan
4             5,74,208*+nan
                ...         
545              58,80,255*
546              53,01,124*
547              73,34,649*
548          53,01,124*+nan
549          53,01,124*+nan
Name: Price, Length: 160031, dtype: object
In [13]:
main["Price"].unique()
Out[13]:
array(['9,69,443*', '5,58,668*+Rs.11,579', '11,51,181*+Rs.40,166', ...,
        '1,19,03,278*', '51,12,533*+nan', '1,09,26,269*'], dtype=object)
In [14]:
main["Price"]=main["Price"].apply(target_preprocess)
In [15]:
main["Price"]
Out[15]:
0       969443
1       570247
2      1191347
3      1063940
4       574208
        ...   
545    5880255
546    5301124
547    7334649
548    5301124
549    5301124
Name: Price, Length: 160031, dtype: int64
In [16]:
#Take Next columns
main["Model"].unique()
Out[16]:
array(['Maruti', 'Ford', 'Audi', 'Bentley', 'BMW', 'Bugatti', 'Force',
        'Ferrari', 'Honda', 'Hyundai', 'Isuzu', 'Jeep', 'Kia',
        'Lamborghini', 'Land_Rover', 'Lexus', 'Mahindra', 'Aston_Martin',
        'Maserati', 'Mercedes-Benz', 'MG', 'Mini', 'Mitsubishi', 'Nissan',
        'Porsche', 'Renault', 'Rolls-Royce', 'Skoda', 'Tata', 'Toyota',
        'Volkswagen', 'Volvo'], dtype=object)
In [17]:
plt.figure(figsize=(30,10))
sns.countplot(main["Model"])
Out[17]:
<AxesSubplot:xlabel='Model', ylabel='count'>
In [18]:
main["Brand"].unique()
Out[18]:
array(['Dzire', 'Eeco', 'S-Cross', 'S-Presso', 'Celerio', 'Ciaz', 'Swift',
        'Ignis', 'Alto 800', 'XL6', 'Baleno', 'Swift Dzire Tour',
        'Super Carry', 'Ertiga', 'Alto K10', 'Brezza', 'Wagon R',
        'Alto 800 tour', 'Ecosport 2015-2021', 'Figo 2010 2012', 'Fusion',
        'Ikon', 'Figo', 'Fiesta 2008-2011', 'Endeavour 2015-2020',
        'Aspire', 'Figo 2012-2015', 'Fiesta 2011-2013', 'Fiesta 2004-2008',
        'Figo 2015-2019', 'Freestyle', 'Mustang', 'Endeavour',
        'Endeavour 2014-2015', 'EcoSport', 'Escort', 'Fiesta',
        'Endeavour 2009-2014', 'Falcon', 'Endeavour 2007-2009',
        'Mondeo 2001-2006', 'Endeavour 2003-2007', 'Q7', 'RS e-tron GT',
        'Q2', 'Q3', 'A6', 'RS5', 'A4', 'e-tron', 'A8L', 'Q5', 'RS Q8',
        'Q8', 'e-tron GT', 'RS7', 'S5 Sportback', 'Continental',
        'Bentayga', 'Flying Spur', 'X5', 'M2', 'Z4', 'X1', '3 Series',
        '2 Series', '6 Series', 'X7', 'iX', 'X3', 'M5', '7 Series',
        '8 Series', '5 Series', 'X5 M', 'i4', 'X6', 'X3 M',
        'M4 Competition', 'Divo', 'Veyron', 'Gurkha', 'Portofino',
        '296 GTB', 'F8 Tributo', 'Roma', '812', 'SF90 Stradale', 'Jazz',
        'City', 'WR-V', 'Amaze', 'City Hybrid', 'City 4th Generation',
        'Creta', 'Aura', 'Santro', 'Venue', 'i20', 'Venue N Line',
        'Tucson', 'Alcazar', 'Grand i10 Nios', 'Verna', 'i20 N Line',
        'MU-X', 'D-Max', 'Compass', 'Meridian', 'Wrangler',
        'Compass Trailhawk', 'Carens', 'Seltos', 'Sonet', 'Carnival',
        'EV6', 'Huracan EVO', 'Aventador', 'Urus', 'Defender', 'Discovery',
        'Discovery Sport', 'NX', 'LC 500h', 'LS', 'RX', 'ES', 'LX',
        'XUV300', 'Scorpio-N', 'Alturas G4', 'Bolero', 'XUV700',
        'KUV 100 NXT', 'Thar', 'Bolero Neo', 'Bolero Camper', 'E Verito',
        'Scorpio Classic', 'Marazzo', 'Vantage', 'DB11', 'DBX', 'Levante',
        'Quattroporte', 'Ghibli', 'GranCabrio', 'GLE', 'E-Class',
        'AMG A 45 S', 'EQC', 'AMG C 63', 'C-Class', 'Maybach S-Class',
        'GLC Coupe', 'AMG E 63', 'AMG GLE 63 S', 'AMG A 35', 'AMG E 53',
        'A-Class Limousine', 'AMG GLA 35', 'S-Class', 'AMG GLE 53',
        'AMG G 63', 'GLS', 'V-Class', 'CLS', 'AMG C 43', 'GLC', 'AMG GT',
        'AMG GLC 43', 'G-Class', 'GLA', 'AMG GT 4-Door Coupe', 'EQS',
        'Hector', 'Hector Plus', 'Gloster', 'Astor', 'ZS EV',
        'Cooper 3 DOOR', 'John cooper Works', 'Cooper Convertible',
        'Cooper SE', 'Cooper Countryman', 'Lancer', 'Outlander 2007-2013',
        'Pajero 2002-2012', 'Montero 2007-2012', 'Cedia', 'FTO',
        'Lancer Evolution X', 'Montero 2009-2014', 'Challenger',
        'Pajero Sport', 'Outlander', 'Montero', 'Magnite', 'Kicks', 'GT-R',
        'Panamera', '911', 'Taycan', 'Cayenne Coupe', '718', 'Cayenne',
        'Macan', 'Kiger', 'Triber', 'KWID', 'Phantom', 'Ghost',
        'Rolls Royce Dawn', 'Cullinan', 'Slavia', 'Kodiaq', 'Kushaq',
        'Octavia', 'Superb', 'Harrier', 'Nexon', 'Tiago', 'Punch',
        'Altroz', 'Tiago NRG', 'Safari', 'Nexon EV Max', 'Tigor EV',
        'Yodha Pickup', 'Tigor', 'Nexon EV Prime', 'Fortuner', 'Hilux',
        'Innova Crysta', 'Glanza', 'Urban Cruiser Hyryder', 'Camry',
        'Vellfire', 'Taigun', 'Virtus', 'Vento', 'Tiguan', 'XC90', 'S90',
        'S60', 'XC40', 'XC60', 'XC40 Recharge'], dtype=object)
In [19]:
main["Varient"].unique()
Out[19]:
array(['Dzire ZXI Plus', 'Eeco 5 Seater AC', 'S-Cross Zeta', ...,
        'XC40 B4 Ultimate', 'XC60 B5 Inscripition', 'XC40 Recharge P8 AWD'],
      dtype=object)
In [4]:
def remove_brand_name(brand,varient):
    var=varient.replace(brand,"")
    if var[0]==" ":
        var=var[1:]
    return var
In [5]:
main["Varient"]=main.apply(lambda x:remove_brand_name(x["Brand"],x["Varient"]),axis=1)
In [6]:
main["Varient"].unique()
Out[6]:
array(['220d M Sport', '220i Sport', '220i M Sport', ...,
        'N8 iMT Dual tone', 'eDrive40', 'xDrive40'], dtype=object)
In [20]:
def ARAI_Mileage_preprocess(text):
    text=str(text)
    if text=="nan":
        return np.nan
    varient="".join(re.findall("[a-zA-Z\/]",text))
    if varient=="kmpl":
        text=float(text.replace(varient,""))
    elif varient=="km/kg":
        text=float(text.replace(varient,""))*1.40
    return text
In [21]:
#This feature is in object type we need to convert it into float
main["ARAI Mileage"]
Out[21]:
0      23.26 kmpl
1      16.11 kmpl
2      18.55 kmpl
3      24.12 kmpl
4       21.4 kmpl
          ...    
545           NaN
546           NaN
547           NaN
548           NaN
549           NaN
Name: ARAI Mileage, Length: 160031, dtype: object
In [22]:
main["ARAI Mileage"].unique()
Out[22]:
array(['23.26 kmpl', '16.11 kmpl', '18.55 kmpl', '24.12 kmpl',
        '21.4 kmpl', '26.0 kmpl', '20.04 kmpl', '23.76 kmpl', '20.89 kmpl',
        '31.59 km/kg', '20.97 kmpl', '22.94 kmpl', '26.55 km/kg', nan,
        '23.2 kmpl', '26.11 km/kg', '22.35 kmpl', '19.95 kmpl',
        '18.43 kmpl', '20.3 kmpl', '25.24 kmpl', '20.51 kmpl',
        '24.39 kmpl', '21.7 kmpl', '19.89 kmpl', '34.05 km/kg',
        '20.65 kmpl', '35.6 km/kg', '20.27 kmpl', '22.05 kmpl',
        '19.8 kmpl', '30.9 km/kg', '20.88 km/kg', '24.97 kmpl',
        '23.56 kmpl', '24.35 kmpl', '24.43 kmpl', '26.68 kmpl',
        '20.15 kmpl', '25.19 kmpl', '24.9 kmpl', '23.0 kmpl', '15.6 kmpl',
        '17.7 kmpl', '14.2 kmpl', '18.5 kmpl', '20.0 kmpl', '15.3 kmpl',
        '13.8 kmpl', '26.1 kmpl', '23.5 kmpl', '25.83 kmpl', '16.6 kmpl',
        '20.4 kmpl', '17.8 kmpl', '13.6 kmpl', '24.29 kmpl', '15.9 kmpl',
        '19.0 kmpl', '13.0 kmpl', '13.9 kmpl', '23.8 kmpl', '13.1 kmpl',
        '25.5 kmpl', '11.4 kmpl', '18.16 kmpl', '14.2 km/kg', '24.4 kmpl',
        '17.0 kmpl', '12.8 kmpl', '25.01 kmpl', '13.5 kmpl', '16.86 kmpl',
        '14.0 kmpl', '12.62 kmpl', '18.12 kmpl', '20.4 km/kg', '10.9 kmpl',
        '17.01 kmpl', '9.0 kmpl', '22.77 kmpl', '19.4 kmpl', '18.1 kmpl',
        '16.0 kmpl', '12.4 kmpl', '16.97 kmpl', '16.3 kmpl', '14.7 kmpl',
        '10.91 kmpl', '11.21 kmpl', '14.11 kmpl', '8.8 kmpl', '13.47 kmpl',
        '9.8 kmpl', '8.9 kmpl', '12.5 kmpl', '12.9 kmpl', '10.2 kmpl',
        '10.1 kmpl', '11.24 kmpl', '10.63 kmpl', '14.37 kmpl',
        '19.62 kmpl', '14.82 kmpl', '16.13 kmpl', '20.37 kmpl',
        '13.32 kmpl', '13.38 kmpl', '11.86 kmpl', '12.04 kmpl',
        '13.17 kmpl', '11.29 kmpl', '9.12 kmpl', '5.59 kmpl', '17.42 kmpl',
        '18.65 kmpl', '39.53 kmpl', '8.29 kmpl', '18.64 kmpl',
        '10.31 kmpl', '11.3 kmpl', '10.54 kmpl', '17.09 kmpl',
        '16.55 kmpl', '17.66 kmpl', '6.8 kmpl', '17.1 kmpl', '18.4 kmpl',
        '23.7 kmpl', '24.7 kmpl', '18.6 kmpl', '16.5 kmpl', '26.5 kmpl',
        '17.4 kmpl', '24.1 kmpl', '20.1 kmpl', '25.0 kmpl', '21.0 kmpl',
        '20.5 kmpl', '18.45 kmpl', '20.28 kmpl', '14.5 kmpl', '19.65 kmpl',
        '20.7 kmpl', '16.8 kmpl', '21.3 kmpl', '20.25 kmpl', '30.48 km/kg',
        '28.0 km/kg', '19.2 kmpl', '12.31 kmpl', '16.56 kmpl', '14.1 kmpl',
        '15.7 kmpl', '16.2 kmpl', '12.1 kmpl', '14.3 kmpl', '14.9 kmpl',
        '17.3 kmpl', '18.3 kmpl', '18.2 kmpl', '20.8 kmpl', '18.0 kmpl',
        '7.25 kmpl', '7.04 kmpl', '7.69 kmpl', '7.87 kmpl', '7.3 kmpl',
        '12.3 kmpl', '15.4 kmpl', '18.8 kmpl', '22.37 kmpl', '6.9 kmpl',
        '12.05 kmpl', '18.15 kmpl', '17.29 kmpl', '15.2 kmpl', '12.0 kmpl',
        '11.76 kmpl', '9.7 kmpl', '16.34 kmpl', '8.62 kmpl', '8.26 kmpl',
        '16.1 kmpl', '16.9 kmpl', '15.0 kmpl', '12.74 kmpl', '12.65 kmpl',
        '8.13 kmpl', '14.025 kmpl', '16.65 kmpl', '17.33 kmpl',
        '15.81 kmpl', '16.72 kmpl', '14.34 kmpl', '13.7 kmpl', '9.5 kmpl',
        '12.25 kmpl', '14.7 km/kg', '14.8 kmpl', '10.5 kmpl', '11.56 kmpl',
        '18.75 kmpl', '14.23 kmpl', '10.75 kmpl', '9.17 kmpl',
        '16.12 kmpl', '19.03 kmpl', '19.17 kmpl', '22.02 kmpl',
        '22.25 kmpl', '18.24 kmpl', '22.0 kmpl', '18.72 kmpl',
        '12.78 kmpl', '18.07 kmpl', '15.78 kmpl', '17.95 kmpl',
        '19.47 kmpl', '17.88 kmpl', '18.41 kmpl', '15.1 kmpl', '17.2 kmpl',
        '14.6 kmpl', '17.57 kmpl', '20.09 kmpl', '18.82 kmpl',
        '18.97 kmpl', '18.53 kmpl', '14.08 kmpl', '16.35 kmpl',
        '16.14 kmpl', '22.07 kmpl', '21.19 kmpl', '23.03 kmpl',
        '26.49 km/kg', '18.13 kmpl', '19.27 kmpl', '8.0 kmpl',
        '27.97 kmpl', '10.0 kmpl', '20.58 kmpl', '17.23 kmpl',
        '18.47 kmpl', '18.67 kmpl', '17.69 kmpl', '36.0 kmpl', '11.2 kmpl'],
      dtype=object)
In [23]:
main["ARAI Mileage"]=pd.Series(main["ARAI Mileage"]).apply(ARAI_Mileage_preprocess)
In [24]:
main.rename(columns={"ARAI Mileage":"ARAI Mileage(Km/L)"},inplace=True)
In [25]:
main["ARAI Mileage(Km/L)"]
Out[25]:
0      23.26
1      16.11
2      18.55
3      24.12
4      21.40
        ...  
545      NaN
546      NaN
547      NaN
548      NaN
549      NaN
Name: ARAI Mileage(Km/L), Length: 160031, dtype: float64
In [26]:
main["Engine Displacement (cc)"]
Out[26]:
0      1197.0
1      1196.0
2      1462.0
3      1197.0
4       998.0
        ...  
545       NaN
546    1969.0
547    1969.0
548    1969.0
549    1969.0
Name: Engine Displacement (cc), Length: 160031, dtype: float64
In [27]:
main["Max Power (bhp@rpm)"]
Out[27]:
0       88.50bhp@6000rpm
1       72.41bhp@6000rpm
2      103.25bhp@6000rpm
3       88.50bhp@6000rpm
4       65.71bhp@5500rpm
              ...        
545            402.30bhp
546                  NaN
547            246.58Bhp
548                  NaN
549                  NaN
Name: Max Power (bhp@rpm), Length: 160031, dtype: object
In [127]:
main["Max Power (bhp@rpm)"].unique()
Out[127]:
array(['88.50bhp@6000rpm', '72.41bhp@6000rpm', '103.25bhp@6000rpm',
        '65.71bhp@5500rpm', '81.80bhp@6000rpm', '40.36bhp@6000rpm',
        '101.65bhp@6000rpm', '70.40bhp@6000rpm', '86.63bhp@5500rpm',
        '55.92bhp@5300rpm', nan, '47.33bhp@6000rpm', '76.43bhp@6000rpm',
        '61.68bhp@6000rpm', '98.96bhp@3750rpm', '70bhp@6250rpm',
        '68 @ 4,000 (PS@rpm)', '92@5500(PS@rpm)', '94.93bhp@6500rpm',
        '68bhp@4000rpm', '101 @ 6,500 (PS@rpm)', '158.2bhp@3200rpm',
        '70 @ 5,500 (PS@rpm)', '99.23bhp@3750rpm', '68.05bhp@4000rpm',
        '90ps @ 3750rpm', '99bhp@3750rpm', '121bhp@6500rpm',
        '70.02bhp@6250rpm', '120.69bhp@6500rpm', '94.68bhp@6500rpm',
        '395bhp@6500+-50rpm', '167.62bhp@3500rpm', '141bhp@3500rpm',
        '153.86bhp@3200rpm', '86.8bhp@6300rpm', '98.63bhp@3750rpm',
        '109PS @ 6450rpm', '89.75bhp@3750rpm', '157.7bhp@3200rpm',
        '88.7bhp@3750rpm', '107.5bhp@6045rpm', '98.59bhp@3750rpm',
        '158bhp@3200rpm', '94.89bhp@6500rpm', '197bhp@3000rpm',
        '110.4bhp@6300rpm', '110.5bhp@6300rpm', '123.24bhp@6000rpm',
        '95.48bhp@6500rpm', '153.8bhp@3200rpm', '143 @ 3,500 (PS@rpm)',
        '197.2bhp@3000rpm', '335.25bhp@5200-6400rpm', '636.98bhp',
        '187.74bhp@4200-6000rpm', '187.74bhp@1500-4100rpm',
        '241.3bhp@5000-6500rpm', '443.87bhp@5700-6700rpm',
        '187.74bhp@4200-6000', '230', '335.25bhp@5000-6400rpm', '300',
        '245.59bhp@5000-6000rpm', '591.39bhp@6000rpm', '522.99kw',
        '591bhp@6000-6250rpm', '348.66bhp@5400-6400rpm',
        '340bhp@5000-6400rpm', '500bhp@6000rpm', '542bhp@6000rpm',
        '562bhp@6000rpm', '626bhp@5000-6000rpm', '335.26bhp@5500-6500rpm',
        '410bhp@6250rpm', '194bhp@4500-6500rpm', '187.74bhp@5000-6000rpm',
        '257.47bhp@5000-6000rpm', '254.79bhp@5200rpm', '189.08bhp@5000rpm',
        '187.74bhp@4000rpm', '254.79bhp@5000rpm', '261.50bhp@4000rpm',
        '382.19bhp@5800rpm', '394.26bhp@4400rpm', '321.84Bhp',
        '248.08bhp@5200rpm', '335bhp@5000-6500rpm', '616.87bhp@6000rpm',
        '335.25bhp@5500-6500rpm', '600bhp@6000rpm', '261.49bhp@4000rpm',
        '281.6bhp@5000-6000rpm', '335.25bhp', '335.25bhp@5000-6500rpm',
        '473.38bhp@6250rpm', '502.88bhp@6250rpm', '261.4bhp@4000rpm',
        '187.74bhp@4200rpm', '261.49bhp', '189.08bhp@5000-6000rpm',
        '1479bhp@6700rpm', '1001bhp@6000rpm', '89.84bhp@3200rpm',
        '591.79bhp@7500rpm', '710.74bhp@8000rpm', '611.50bhp@5750-7500rpm',
        '788.52@8500rpm', '769.31@7500rpm', '119.35bhp@6600rpm',
        '97.89bhp@3600rpm', '96.55bhp@5600-6400rpm', '117.60bhp@6600rpm',
        '79.12bhp@3600rpm', '113.18bhp@6300rpm', '81.86bhp@6000rpm',
        '68.05bhp@5500rpm', '118.41bhp@6000rpm', '98.63bhp@4000rpm',
        '113.45bhp@4000rpm', '153.81bhp@6200rpm', '156.82bhp@6500rpm',
        '68.05bhp@6000rpm', '118.36bhp@6000rpm', '113.42bhp@4000rpm',
        '98.56bhp@4000rpm', '98.63bhp@6000rpm', '86.80bhp@6000rpm',
        '138.12bhp@6000rpm', '183.72bhp@4000rpm', '59.17bhp@5500rpm',
        '160.92bhp@3600rpm', '77.77bhp@3800rpm', '160.77bhp@5500rpm',
        '167.67bhp@3750rpm', '268bhp@5250rpm', '113.42bhp@6300rpm',
        '113.43bhp@6300rpm', '138.08bhp@6000rpm', '113.43bhp@4000rpm',
        '138.05bhp@6000rpm', '197.26bhp@3800rpm', '320.55bhp', '225.86bhp',
        '602.11bhp@8000rpm', '630.28bhp@8000rpm', '610bhp',
        '630.3bhp@8000rpm', '759.01bhp@8500rpm', '641bhp@6000rpm',
        '641.00bhp@6000rpm', '640bhp@8000rpm', '187.74bhp@6000rpm',
        '295.02@6600rpm', '354bhp@6600rpm', '258.81bhp@6000rpm',
        '214.56bhp@5700rpm', '362bhp@5600rpm', '108.6bhp@5000rpm',
        '178.49bhp@3800rpm', '115bhp@3750rpm', '74.96bhp@3600rpm',
        '197.13bhp@5000rpm', '82bhp@5500rpm', '150bhp@5000rpm',
        '100bhp@3750rpm', '75.09bhp@3200rpm', '108.62bhp@5000rpm',
        '182.38bhp@3500rpm', '41.57bhp@3500rpm', '130bhp@3750rpm',
        '130.07bhp@3750rpm', '120.96bhp@3500rpm', '152.87bhp@3750rpm',
        '275bhp@4000rpm', '275bhp', '350bhp@5750rpm', '430bhp@5750rpm',
        '530bhp@6800rpm', '450bhp@7000rpm', '325.8bhp@3600-4200rpm',
        '281.61bhp@3400-4600rpm', '415.71bhp@6750rpm', '402.30Bhp',
        '469.35bhp@5500-6250rpm', '261.49bhp@4200rpm',
        '603.46bhp@5250-5500', '241.38bhp@4200rpm',
        '603.46bhp@5750-6500rpm', '191.76bhp@3800rpm', '301.73bhp@5800rpm',
        '429.12bhp@6100rpm', '160.92bhp@5500rpm', '201.15bhp@5800-6800rpm',
        '194.44bhp@5500-6100rpm', '281.61bhp@3400-4600bhp',
        '435bhp@5500-6100rpm', '576.63bhp@6000rpm',
        '325.86bhp3600-4200rpm', '161bhp@3800rpm',
        '254.79bhp@5800-6100rpm', '241.3bhp@4200rpm',
        '325.8bhp@3600-4000rpm', '362.07bhp5500-6100bhp',
        '362.07bhp@5500-6100rpm', '384.87@6100rpm', '197bhp@5500-6100rpm',
        '576.63bhp@6250rpm', '384.87bhp@5500–6100rpm', '187.74bhp@3800rpm',
        '639bhp', '496.17bhp@5500-5500', '194bhp@3800rpm',
        '160.92bhp@4200rpm', '197.13bhp@3600rpm', '549.81bhp6000-6500rpm',
        '750.97bhp', '147.51bhp@1620-4000rpm', '167.68bhp@3750rpm',
        '141bhp@5000rpm', '158.79bhp@4000rpm', '212.55bhp@4000rpm',
        '138.08bhp@5600rpm', '108.49bhp@6000rpm', '167.67Bhp@3750rpm',
        '173.83bhp', '189.08bhp@4700-6000pm', '227.97', '181.03bhp',
        '85.8bhp@5500rpm', '170PS @ 6000rpm', '118.6@4000 (PS@rpm)',
        '164.5 @ 3,500 (PS@rpm)', '115 @ 5,250 (PS@rpm)',
        '68@4,500 (PS@rpm)', '107.2bhp@4000rpm', '290bhp@6500rpm',
        '199.3bhp@3800rpm', '114bhp@5250rpm', '175.56bhp@4000rpm',
        '164.94bhp@6000rpm', '191.3bhp@3800rpm', '178bhp@4000rpm',
        '98.63bhp@5000rpm', '71.02bhp@6250rpm', '153.87bhp@5500rpm',
        '562.20bhp@6800rpm', '104.55bhp@5600rpm', '325.48bhp@5400–6400rpm',
        '379.50bhp@6500', '502.88bhp@8400rpm', '482.76bhp',
        '443.87bhp@6500', '542.4bhp@5750-6000rpm', '616.87bhp',
        '295bhp@6500rpm', '680bhp@5750-6000rpm', '321.84bhp',
        '340bhp@5300-6400rpm', '414.37bhp@7600rpm',
        '434.49bhp@5700-6600rpm', '493.49bhp@8400rpm', '394.26bhp@7000rpm',
        '641.00bhp@6500', '335bhp@5300-6400rpm', '631.62bhp@6000rpm',
        '261.49bhp@5000-6500rpm', '375.48bhp@5200-6700rpm',
        '453.26bhp@6000–6500rpm', '541.773bhp@5750-6000rpm',
        '550bhp@5750-6000rpm', '450hp@6500', '71.01bhp@6250rpm',
        '53.26bhp@5600rpm', '67.06bhp@5500rpm', '563bhp@5000rpm',
        '563bhp@5250rpm', '563bhp@5250-6000rpm', '147.52bhp@5000-6000rpm',
        '113.98bhp@5000-5500rpm', '147.51bhp@5000-6000rpm',
        '187.74bhp@4180-6000rpm', '118.36bhp@5500rpm', '84.82bhp@6000rpm',
        '84.48bhp@6000rpm', '84.88bhp@6000rpm', '141.04bhp', '73.75bhp',
        '85bhp@3000rpm', '108.49bhp@4000rpm', '88.77bhp@4000rpm',
        '72.40bhp@6000rpm', '108.50bhp@5500rpm', '72bhp@6000rpm', '127bhp',
        '85.82bhp@3000rpm', '201.15bhp@3400rpm', '201.15bhp@3000-3400rpm',
        '163.60bhp@5200rpm', '91.18bhp@5500rpm', '101.64bhp@6000rpm',
        '175.67bhp@5700rpm', '115.32bhp@4700rpm', '108.62bhp@5000-5500rpm',
        '400bhp', '246.58Bhp', '190bhp', '246.74bhp@4000rpm', '402.30bhp'],
      dtype=object)
In [128]:
def bhp_rpm_preprocess(text):
    try:
        global final_df,k
        bhp=[]
        rpm=[]
        text=str(text)
        split_value=text.split("@",1)
        split_value.reverse()
        if len(split_value)==1:
            check="".join(re.findall('[a-zA-Z]',split_value[0])).lower()
            if check=="bhprpm":
                _replace=split_value[0].replace("bhp","bhp@")
                lists=_replace.split("@")
                split_value=[]
                split_value.append(lists[1])
                split_value.append(lists[0])
            elif check=="bhpbhp":
                _replace=split_value[0].replace("bhp","bhp@")
                lists=_replace.split("@")
                split_value=[]
                split_value.append(lists[1])
                split_value.append(lists[0])
        if len(split_value)==2:
            first="".join(re.findall('[a-zA-Z]',split_value[0]))
            second="".join(re.findall('[a-zA-Z]',split_value[1]))
            if first==second:
                split_value[0]=split_value[0].replace(first,"rpm")
            if first=="pm":
                split_value[0]=split_value[0].replace(first,"rpm")
            if second=="hp":
                split_value[1]=split_value[1].replace(second,"bhp")
        j=0
        Activate=False
        for i in split_value:
            if len(split_value)==2:
                check="".join(re.findall('[a-zA-Z]',i)).lower()
                if check=="psrpm":
                    try:
                        rpm.append((float(i.replace("".join(re.findall("[\sa-zA-Z\@\(\)]",i)).strip(),""))))
                        Activate=True
                    except:
                        first=i.replace("".join(re.findall("[\sa-zA-Z\@\(\)]",i)).strip(),"")
                        rpm.append(float(first.replace("".join(re.findall('\,',first)).strip(),"")))
                        Activate=True
                else:
                    if Activate==True:
                        convert_into_bhp=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))/1.01387)
                        bhp.append(convert_into_bhp)
                        Activate=False
                    else:
                        if check=="":
                            if j==1:
                                bhp.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                            elif j==0:
                                try:
                                    rpm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                                except:
                                    intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
                                    lists=intial.split("-")
                                    average_of_rpm=(float(lists[0])+float(lists[1]))/2
                                    rpm.append(average_of_rpm)
                        else:
                            if check=="kw":
                                convert_into_bhp=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))/0.745699872)
                                bhp.append(convert_into_bhp)
                            elif check=="bhp":
                                _bhp=float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))
                                bhp.append(_bhp)
                            elif check=="ps":
                                convert_into_bhp=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))/1.014)
                                bhp.append(convert_into_bhp)
                            elif check=="rpm":
                                try:
                                    _rpm=float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))
                                    rpm.append(_rpm)
                                except:
                                    try:
                                        intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
                                        try:
                                            lists=intial.split("-")
                                            average_of_rpm=(float(lists[0])+float(lists[1]))/2
                                            rpm.append(average_of_rpm)
                                        except:
                                            lists=intial.split("–")
                                            average_of_rpm=(float(lists[0])+float(lists[1]))/2
                                            rpm.append(average_of_rpm)
                                    except:
                                        intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
                                        lists=intial.split("+-")
                                        _first=float(lists[0])#take the average value
                                        rpm.append(_first)

            else:
                check="".join(re.findall('[a-zA-Z]',i)).lower()
                if check=="kw":
                    convert_into_bhp=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))/0.745699872)
                    bhp.append(convert_into_bhp)
                    rpm.append(0)
                elif check=="bhp":
                    bhp.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                    rpm.append(0)
                elif check=="rpm":
                    rpm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                    bhp.append(0)
                elif check=="nan":
                    bhp.append(np.nan)
                    rpm.append(np.nan)
                else:
                    bhp.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                    rpm.append(0)
            j=j+1 

        temp=pd.DataFrame({
        "BHP":bhp,
        "RPM":rpm
        })
        final_df=pd.concat([final_df,temp],axis=0)
        print(k)
        k=k+1
    except:
        print("UnSuccessfully executed",k)
        k=k+1
In [ ]:
#This are the possible cases of the Max Power (bhp@rpm) column....
# 522.99kw
# 300 #Rpm
# 187.74bhp@4200-6000rpm
# 636.98bhp
# 153.86bhp@3200rpm
# 68 @ 4,000 (PS@rpm)
# 92@5500(PS@rpm)
# 90ps @ 3750rpm
# 395bhp@6500+-50rpm
# 34hp@2000
# 90bhp@1000pm
# 125bhp2000rpm
In [62]:
k=0
final_df=pd.DataFrame()
pd.Series(main["Max Power (bhp@rpm)"]).apply(bhp_rpm_preprocess)
main.shape
Out[62]:
(160031, 155)
In [63]:
final_df.shape
Out[63]:
(160031, 3)
In [29]:
final_df=final_df.reset_index().drop(["index"],axis=1)
In [30]:
main=main.reset_index(drop=True)
In [31]:
main=pd.concat([main,final_df],axis=1)
In [32]:
main[["Max Power (bhp@rpm)","BHP","RPM"]]
Out[32]:
Max Power (bhp@rpm) BHP RPM
0 88.50bhp@6000rpm 88.50 6000.0
1 72.41bhp@6000rpm 72.41 6000.0
2 103.25bhp@6000rpm 103.25 6000.0
3 88.50bhp@6000rpm 88.50 6000.0
4 65.71bhp@5500rpm 65.71 5500.0
... ... ... ...
160026 402.30bhp 402.30 0.0
160027 NaN NaN NaN
160028 246.58Bhp 246.58 0.0
160029 NaN NaN NaN
160030 NaN NaN NaN

160031 rows × 3 columns

In [33]:
main.iloc[155023][["Max Power (bhp@rpm)","RPM","BHP"]]
Out[33]:
Max Power (bhp@rpm)    91.18bhp@5500rpm
RPM                              5500.0
BHP                               91.18
Name: 155023, dtype: object
In [34]:
final_df.iloc[155023]
Out[34]:
Unnamed: 0    155023.00
BHP               91.18
RPM             5500.00
Name: 155023, dtype: float64
In [35]:
main.drop(["Max Power (bhp@rpm)"],axis=1,inplace=True)
In [36]:
main["Seating Capacity"]
Out[36]:
0         5.0
1         5.0
2         5.0
3         5.0
4         5.0
          ... 
160026    5.0
160027    NaN
160028    5.0
160029    NaN
160030    NaN
Name: Seating Capacity, Length: 160031, dtype: float64
In [37]:
main["Boot Space (Litres)"]
Out[37]:
0         378.0
1           NaN
2         375.0
3         378.0
4           NaN
          ...  
160026      414
160027      NaN
160028      NaN
160029      NaN
160030      NaN
Name: Boot Space (Litres), Length: 160031, dtype: object
In [38]:
main["Body Type"]
Out[38]:
0             Sedan
1           Minivan
2               SUV
3             Sedan
4         Hatchback
            ...    
160026          SUV
160027          SUV
160028        Sedan
160029          SUV
160030          SUV
Name: Body Type, Length: 160031, dtype: object
In [39]:
main["Body Type"].unique()
Out[39]:
array(['Sedan', 'Minivan', 'SUV', 'Hatchback', 'MUV', 'Pickup Truck',
        'Coupe', 'Luxury', 'Convertible', nan, 'Wagon', 'Hybrid'],
      dtype=object)
In [40]:
plt.figure(figsize=(15,10))
sns.countplot(main["Body Type"])
Out[40]:
<AxesSubplot:xlabel='Body Type', ylabel='count'>
In [41]:
main.columns
Out[41]:
Index(['Model', 'Brand', 'Varient', 'ARAI Mileage(Km/L)',
        'Engine Displacement (cc)', 'Seating Capacity', 'Boot Space (Litres)',
        'Body Type', 'Fuel Type', 'No. of cylinder',
        ...
        'Ventilated Seats', 'Tyre Pressure Monitor', 'Rain Sensing Wiper',
        'Turbo Charger', 'Air Quality Control', 'Traction Control',
        'Vehicle Stability Control System', 'Unnamed: 0', 'BHP', 'RPM'],
      dtype='object', length=153)
In [42]:
main["Fuel Type"]
Out[42]:
0           Petrol
1           Petrol
2           Petrol
3           Petrol
4           Petrol
            ...   
160026    Electric
160027      Petrol
160028      Petrol
160029      Petrol
160030      Petrol
Name: Fuel Type, Length: 160031, dtype: object
In [43]:
main["Fuel Type"].unique()
Out[43]:
array(['Petrol', 'CNG', 'Diesel', 'Electric'], dtype=object)
In [44]:
plt.figure(figsize=(15,5))
sns.countplot(main["Fuel Type"])
Out[44]:
<AxesSubplot:xlabel='Fuel Type', ylabel='count'>
In [45]:
main["No. of cylinder"]
Out[45]:
0         4.0
1         4.0
2         4.0
3         4.0
4         3.0
          ... 
160026    NaN
160027    4.0
160028    4.0
160029    4.0
160030    4.0
Name: No. of cylinder, Length: 160031, dtype: float64
In [46]:
main['Max Torque (nm@rpm)']
Out[46]:
0         113Nm@4400rpm
1          98Nm@3000rpm
2         138Nm@4400rpm
3         113Nm@4400rpm
4          89Nm@3500rpm
              ...      
160026            660Nm
160027              NaN
160028            350Nm
160029              NaN
160030              NaN
Name: Max Torque (nm@rpm), Length: 160031, dtype: object
In [102]:
main["Max Torque (nm@rpm)"].unique()
Out[102]:
array(['113Nm@4400rpm', '98Nm@3000rpm', '138Nm@4400rpm', '89Nm@3500rpm',
        '89nm@3500rpm', '113Nm@4200rpm', '60Nm@3500rpm', '136.8nm@4400rpm',
        '95nm@4000rpm', '98Nm @3000rpm', '121.5nm@4200rpm',
        '113nm@4200rpm', '82.1Nm@3400rpm', nan, '82.1nm@3400rpm',
        '69Nm@3500rpm', '98.5Nm@4300rpm', '85Nm@3000rpm', '98nm@3000rpm',
        '69nm@3500rpm', '205Nm@1750-3250rpm', '102Nm@4000rpm',
        '16 @ 2,000 (kgm@rpm)', '130@2500(kgm@rpm)', '215Nm@1750-2500rpm',
        '119Nm@4250rpm', '160Nm@2000rpm', '14.9 @ 3,400 (kgm@rpm)',
        '385Nm@1600-2500rpm', '10.7 @ 2,500 (kgm@rpm)',
        '215Nm@1750-3000rpm', '204Nm @ 2000-2750rpm',
        '16.3 @ 2,000 (kgm@rpm)', '150Nm@4500rpm',
        '14.75 @ 3,400 (kgm@rpm)', '120Nm@4250rpm', '149Nm@4500rpm',
        '515Nm@4250+-50rpm', '420Nm@2000-2500rpm', '215nm@1750-2500rpm',
        '330Nm@1800rpm', '119nm@4250rpm', '215Nm@1750-3000',
        '380Nm@2500rpm', '112Nm@4000rpm', '140Nm @ 4500rpm',
        '204Nm@2000-2750rpm', '140Nm@4500rpm', '14.8 @ 3,400 (kgm@rpm)',
        '470Nm@1750-2500rpm', '136Nm@4250rpm', '170Nm@1500-4500rpm',
        '33.7 @ 1,800 (kgm@rpm)', '500Nm@1370-4500rpm', '830Nm',
        '320nm@1500–4180rpm', '370Nm@1600-4500rpm', '600nm@1900-5000rpm',
        '320nm@1450–4200', '540', '500nm@1370-4500rpm', '664',
        '370nm@1600-4300bhp', '800nm@2200-4500rpm', '630Nm',
        '800nm@2050-4500rpm', '660Nm@1700rpm', '770 Nm@2000-4500rpm',
        '700Nm@1600rpm', '900nm@1350-4500rpm', '900Nm@1350-4500rpm',
        '450Nm@1500-5200rpm', '550Nm@2350-5230rpm', '320Nm@1450-4200rpm',
        '400nm@1750-2500rpm', '280nm@1350-4600rpm', '400Nm@1550-4400rpm',
        '400Nm@1750-2500rpm', '620Nm@1500-2500rpm', '500Nm@1850-5000rpm',
        '760nm@2000-3000rpm', '350nm@1450-4800rpm', '500Nm@1600-4500rpm',
        '750nm@1800-5860rpm', '450Nm@1500-2000rpm', '750nm@1800-5600rpm',
        '620nm@2000–2500rpm', '450Nm@1380-5000rpm', '430Nm',
        '500nm@1600-4500rpm', '450Nm@1500–5200rpm', '600nm@2600-5600rpm',
        '400nm@1750–2500rpm', '650Nm@2750-5500rpm', '620Nm@2000-2500rpm',
        '620Nm@1500–2500rpm', '620nm@2000–2500', '1600Nm@2000-6000rpm',
        '1250Nm@2200-5500rpm', '250Nm@1400-2400rpm', '760Nm@3000-5250rpm',
        '770nm@3250rpm', '760Nm@3000-5750rpm', '718Nm@7000rpm',
        '800Nm@6000rpm', '110Nm@4800rpm', '145Nm@4300rpm', '200Nm@1750rpm',
        '127nm@4500-5000', '145Nm@4600rpm', '160Nm@1750rpm',
        '143.8nm@4500rpm', '113.8nm@4000rpm', '99Nm@4500 rpm',
        '172Nm@1500-4000rpm', '240.26nm@1500-2750rpm',
        '250nm@1500-2750rpm', '192nm@4500rpm', '114.74nm@4200rpm',
        '191nm@4500rpm', '95.2nm@4000rpm', '171.62nm@1500-4000rpm',
        '143.8Nm@4500rpm', '240Nm@1500-2750rpm', '172nm@1500-4000rpm',
        '113.8Nm@4000rpm', '242nm@1500-3200rpm', '250Nm@1500-2750rpm',
        '416nm@2000-2750rpm', '85.3Nm@4500 rpm', '360Nm@2000-2500rpm',
        '360nm@2000-2500rpm', '176Nm@1500-2400 rpm', '250nm@2500-4000rpm',
        '350Nm@1750-2500rpm', '400nm@3000rpm', '350nm@1750-2500rpm',
        '144nm@4500rpm', '240nm@1500-2750rpm', '115nm@4200rpm',
        '440nm@1750-2750rpm', '605nm', '350nm', '560Nm@6500rpm',
        '565Nm@6500rpm', '560Nm', '600Nm@6500rpm', '565Nm',
        '720Nm@6750rpm', '850nm@2250-4500rpm', '239Nm@4300-4500rpm',
        '350Nm@5100rpm', '350nm@5100rpm', '335Nm@4600rpm',
        '202Nm@3600-5200rpm', '530Nm@3200rpm', '200Nm@2000-3500rpm',
        '420Nm@1600-2600rpm', '300Nm@1500-2500rpm', '210nm@1600-2200rpm',
        '210Nm@1600-2200rpm', '380nm@1750-3000rpm', '115Nm@3500-3600rpm',
        '300nm@1250-3000rpm', '260nm@1750-2250rpm', '320nm@1500-3000rpm',
        '200nm@1400-2200rpm', '420Nm@1600-2800rpm', '91Nm@3000rpm',
        '300nm@1600-2800rpm', '300Nm@1600-2800rpm', '300Nm@1750-2500rpm',
        '360nm@1500-2800rpm', '450Nm@1750-2800rpm', '600Nm@2000-2600rpm',
        '600Nm@2000-4000rpm', '580Nm@1750-4500rpm', '580nm',
        '580Nm@2000-5750rpm', '450nm', '500Nm@1750-4750rpm',
        '580Nm@2250-4000rpm', '650Nm@2000-4000rpm', '510Nm@4750rpm',
        '700Nm@1600-4000rpm', '600nm@1200-3200rpm', '500nm@5000-5250rpm',
        '760Nm', '650nm@1750-4500rpm', '550nm@1800-2200rpm',
        '900nm@2000-4000rpm', '500nm@1600-2400rpm', '850nm@2500-4500rpm',
        '850nm@2500-5000rpm', '400Nm@1600-2800rpm', '400Nm@3000-4000rpm',
        '520nm@1800-5800rpm', '250nm@1620-4000rpm', '400nm@3000-4000rpm',
        '300nm@1800-4000rpm', '320Nm@1650-4000rpm', '850nm@2500–3500rpm',
        '700nm@1200-3200rpm', '380Nm@1400-2400rpm', '370Nm@1800-4000rpm',
        '500Nm@1600-2400rpm', '700Nm@1200-3000rpm', '500Nm@1600-4000rpm',
        '520nm@2500-5000rpm', '320nm@1650-4000rpm', '700Nm@2100–5500rpm',
        '600Nm@1200-3200rpm', '400Nm@1600-2600rpm', '900nm@2500-4500rpm',
        '700nm@2000-4500rpm', '400nm@2800rpm', '380Nm@1200-4000rpm',
        '440nm@1800-2800rpm', '250nm@1620–4000rpm', '730nm@2500-4500rpm',
        '1020Nm', '320Nm@1400-3500rpm', '250nm@1600-3600rpm',
        '250Nm@1600-3600rpm', '373.5Nm@1500-2400rpm',
        '478.5Nm@1500-2400rpm', '220nm@3600rpm', '144nm@4400rpm', '280nm',
        '280Nm@1250rpm', '320Nm', '270Nm@1000rpm', '280Nm@1350rpm',
        '132.3Nm@3300rpm', '226Nm @ 4100rpm', '29.8@2000 (kgm@rpm)',
        '39.1 @ 2,000 (kgm@rpm)', '17.8 @ 4,250 (kgm@rpm)',
        '12.5@3,000 (kgm@rpm)', '275Nm@2000rpm', '366Nm@3500rpm',
        '441Nm@2000rpm', '175Nm@4250rpm', '350Nm@1800-3500rpm',
        '222Nm@4100rpm', '400Nm@2000-2500rpm', '160nm@2800-3600rpm',
        '152nm@2200-4400rpm', '96nm@3500rpm', '254nm@1600rpm',
        '637Nm@3300-5800rpm', '142Nm@4000rpm', '448.77nm@1960-4500rpm',
        '450Nm1950–5000', '470Nm@6100rpm', '650Nm', '530Nm@2300-5000',
        '770nm@2000-4500rpm', '500Nm', '380Nm@1950-4500rpm',
        '770nm@1960-4500rpm', '345Nm', '450Nm@1340-5300rpm',
        '420Nm@8000rpm', '550Nm@1900-5600rpm', '450Nm@6750rpm',
        '420Nm@5000–6500/5500rpm', '420Nm', '450nm@1340-5300rpm',
        '850Nm@2300to4500rpm', '400Nm@1800-4500rpm', '520Nm@1850-5000rpm',
        '620Nm@1800–4500rpm', '770Nm@1960-4500rpm', '96Nm@3500rpm',
        '72Nm@4250rpm', '91Nm@4250rpm', '900Nm@1700rpm', '820Nm@1500rpm',
        '840Nm@1650-4750rpm', '850Nm@1600rpm', '250nm@1600-3500rpm',
        '320nm@1500-4100rpm', '178nm@1750-4500rpm', '320nm@1500-3990rpm',
        '320nm@1450-4200rpm', '170nm@1750-4000rpm', '113nm@3300rpm',
        '113Nm@3300+/-100rpm', '113Nm@3300rpm', '250Nm', '170nm',
        '250Nm@1000-2000rpm', '260nm@1500-2750rpm', '200nm@1250-3000rpm',
        '95nm@3500rpm', '140nm@1500-5500rpm', '245nm', '170Nm',
        '420nm@1400-3400rpm', '500nm@1600-2800rpm', '245nm@4000rpm',
        '113nm@4400rpm', '122Nm@4400-4800rpm', '500Nm@1600-2800rpm',
        '136.8Nm@4400rpm', '221Nm@3600to5200rpm', '198nm@2800-4000rpm',
        '175Nm@1750-4000rpm', '250Nm@1600-3500rpm', '320Nm@1500-4100rpm',
        '640Nm@1740rpm', '350Nm', '300nm', '440Nm@1500-3000rpm', '660Nm'],
      dtype=object)
In [149]:
def nm_rpm_preprocess(text):
    try:
        global nm_final_df,k,names
        nm=[]
        rpm=[]
        text=str(text)
        split_value=text.split("@",1)
        split_value.reverse()
        backup_split_value=split_value
        if len(split_value)==1:
            check="".join(re.findall('[a-zA-Z]',split_value[0])).lower()
            if check=="nmrpm":
                _replace=split_value[0].replace("nm","nm@")
                lists=_replace.split("@")
                split_value=[]
                split_value.append(lists[1])
                split_value.append(lists[0])
            elif check=="nm":
                try:
                    _replace=split_value[0].replace("Nm","nm@")
                    if "@" not in _replace:
                        raise Exception("own exception")
                except:
                    _replace=split_value[0].replace("nm","nm@")
                lists=_replace.split("@")
                split_value=[]
                split_value.append(lists[1])
                split_value.append(lists[0])
                if split_value[0]=="":
                    split_value=backup_split_value
        if len(split_value)==2:
            first="".join(re.findall('[a-zA-Z]',split_value[0])).lower()
            if first=="torpm":
                split_value[0]=split_value[0].replace("to","-")
            if first=="bhp":
                split_value[0]=split_value[0].replace(first,"rpm")
        j=0
        Activate=False
        for i in split_value:
            if len(split_value)==2:
                check="".join(re.findall('[a-zA-Z]',i)).lower()
                if check=="kgmrpm":
                    try:
                        rpm.append((float(i.replace("".join(re.findall("[\sa-zA-Z\@\(\)]",i)).strip(),""))))
                        Activate=True
                    except:
                        first=i.replace("".join(re.findall("[\sa-zA-Z\@\(\)]",i)).strip(),"")
                        rpm.append(float(first.replace("".join(re.findall('\,',first)).strip(),"")))
                        Activate=True
                else:
                    if Activate==True:
                        convert_into_nm=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))*9.80665)
                        nm.append(convert_into_nm)
                        Activate=False
                    else:
                        if check=="":
                            if j==1:
                                nm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                            elif j==0:
                                try:
                                    rpm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                                except:
                                    try:
                                        intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
                                        lists=intial.split("-")
                                        average_of_rpm=(float(lists[0])+float(lists[1]))/2
                                        rpm.append(average_of_rpm)
                                    except:
                                        lists=intial.split("–")
                                        average_of_rpm=(float(lists[0])+float(lists[1]))/2
                                        rpm.append(average_of_rpm)
                        else:
                            if check=="nm":
                                _nm=float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))
                                nm.append(_nm)
                            elif check=="kgm":
                                convert_into_nm=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))*9.80665)
                                nm.append(convert_into_bhp)
                            elif check=="rpm":
                                try:
                                    _rpm=float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))
                                    rpm.append(_rpm)
                                except:
                                    try:
                                        intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
                                        try:
                                            lists=intial.split("-")
                                            try:
                                                average_of_rpm=(float(lists[0])+float(lists[1]))/2
                                            except:
                                                first=float(lists[0])
                                                second=lists.aplit("/")
                                                average_of_rpm=second[1]
                                            rpm.append(average_of_rpm)
                                        except:
                                            lists=intial.split("–")
                                            try:
                                                average_of_rpm=(float(lists[0])+float(lists[1]))/2
                                            except:
                                                second=lists[1].split("/")
                                                average_of_rpm=float(second[1])
                                            rpm.append(average_of_rpm)
                                    except:
                                        try:
                                            intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
                                            lists=intial.split("+-")
                                            _first=float(lists[0])#take the average value
                                            rpm.append(_first)
                                        except:
                                            intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
                                            lists=intial.split("+/-")
                                            _first=float(lists[0])#take the average value
                                            rpm.append(_first)

            else:
                check="".join(re.findall('[a-zA-Z]',i)).lower()
                if check=="nm":
                    nm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                    rpm.append(0)
                elif check=="rpm":
                    rpm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                    nm.append(0)
                elif check=="nan":
                    nm.append(np.nan)
                    rpm.append(np.nan)
                else:
                    nm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
                    rpm.append(0)
            j=j+1 

        temp=pd.DataFrame({
        "NM":nm,
        "NM_RPM":rpm
        })
        nm_final_df=pd.concat([nm_final_df,temp],axis=0)
        print(k)
        k=k+1
    except:
        print("UnSuccessfully executed",k)
        names.append(main["Max Torque (nm@rpm)"].iloc[k])
        k=k+1
In [223]:
# 113Nm@4400rpm
# 205Nm@1750-3250rpm
# 16 @ 2,000 (kgm@rpm)
# 515Nm@4250+-50rpm
# 830Nm
# 540
# 420Nm@5000–6500/5500rpm
# 450Nm1950–5000
# 850Nm@2300to4500rpm
# 113Nm@3300+/-100rpm
# 370nm@1600-4300bhp

names=[]
k=0
nm_final_df=pd.DataFrame()
pd.Series(main["Max Torque (nm@rpm)"]).apply(nm_rpm_preprocess)
In [ ]:
nm_final_df.shape
In [ ]:
main.shape
In [150]:
main["Max Torque (nm@rpm)"].iloc[124596]
Out[150]:
'448.77nm@1960-4500rpm'
In [151]:
nm_final_df[["NM","NM_RPM"]].iloc[124596]
Out[151]:
NM         448.77
NM_RPM    3230.00
Name: 124596, dtype: float64
In [60]:
nm_final_df=nm_final_df.reset_index(drop=True)
In [61]:
main=main.reset_index(drop=True)
In [62]:
main=pd.concat([main,nm_final_df],axis=1)
In [63]:
main.drop(["Max Torque (nm@rpm)"],axis=1,inplace=True)
In [64]:
main.drop(["Unnamed: 0"],axis=1,inplace=True)
In [65]:
main.columns
Out[65]:
Index(['Model', 'Brand', 'Varient', 'ARAI Mileage(Km/L)',
        'Engine Displacement (cc)', 'Seating Capacity', 'Boot Space (Litres)',
        'Body Type', 'Fuel Type', 'No. of cylinder',
        ...
        'Tyre Pressure Monitor', 'Rain Sensing Wiper', 'Turbo Charger',
        'Air Quality Control', 'Traction Control',
        'Vehicle Stability Control System', 'BHP', 'RPM', 'NM', 'NM_RPM'],
      dtype='object', length=153)
In [66]:
main["TransmissionType"]
Out[66]:
0            Manual
1            Manual
2            Manual
3         Automatic
4            Manual
            ...    
160026    Automatic
160027    Automatic
160028    Automatic
160029    Automatic
160030    Automatic
Name: TransmissionType, Length: 160031, dtype: object
In [67]:
main["TransmissionType"].unique()
Out[67]:
array(['Manual', 'Automatic'], dtype=object)
In [68]:
plt.figure(figsize=(15,5))
sns.countplot(main["TransmissionType"])
Out[68]:
<AxesSubplot:xlabel='TransmissionType', ylabel='count'>
In [69]:
main["Fuel Tank Capacity"]
Out[69]:
0         37.0
1         40.0
2         48.0
3         37.0
4         27.0
          ... 
160026     NaN
160027     NaN
160028    60.0
160029     NaN
160030     NaN
Name: Fuel Tank Capacity, Length: 160031, dtype: float64
In [70]:
main["Multi-function Steering Wheel"]
Out[70]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Multi-function Steering Wheel, Length: 160031, dtype: object
In [71]:
main.columns
Out[71]:
Index(['Model', 'Brand', 'Varient', 'ARAI Mileage(Km/L)',
        'Engine Displacement (cc)', 'Seating Capacity', 'Boot Space (Litres)',
        'Body Type', 'Fuel Type', 'No. of cylinder',
        ...
        'Tyre Pressure Monitor', 'Rain Sensing Wiper', 'Turbo Charger',
        'Air Quality Control', 'Traction Control',
        'Vehicle Stability Control System', 'BHP', 'RPM', 'NM', 'NM_RPM'],
      dtype='object', length=153)
In [72]:
#we handle the nan values after done this
main["Multi-function Steering Wheel"].unique()
Out[72]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [73]:
plt.figure(figsize=(15,5))
sns.countplot(main["Multi-function Steering Wheel"])
Out[73]:
<AxesSubplot:xlabel='Multi-function Steering Wheel', ylabel='count'>
In [74]:
main["Touch Screen"]
Out[74]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Touch Screen, Length: 160031, dtype: object
In [75]:
main["Touch Screen"].unique()
Out[75]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [76]:
plt.figure(figsize=(15,5))
sns.countplot(main["Touch Screen"])
Out[76]:
<AxesSubplot:xlabel='Touch Screen', ylabel='count'>
In [77]:
main.columns[15]
Out[77]:
'Alloy Wheels'
In [78]:
main["Engine Start Stop Button"]
Out[78]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Engine Start Stop Button, Length: 160031, dtype: object
In [79]:
main["Engine Start Stop Button"].unique()
Out[79]:
array(['YES', nan, 'NO'], dtype=object)
In [80]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Start Stop Button"])
Out[80]:
<AxesSubplot:xlabel='Engine Start Stop Button', ylabel='count'>
In [81]:
main.columns[16]
Out[81]:
'Power Windows Rear'
In [82]:
main["Alloy Wheels"]
Out[82]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Alloy Wheels, Length: 160031, dtype: object
In [83]:
main["Alloy Wheels"].unique()
Out[83]:
array(['YES', nan, 'NO', 'OPTIONAL', '4'], dtype=object)
In [84]:
plt.figure(figsize=(15,5))
sns.countplot(main["Alloy Wheels"])
Out[84]:
<AxesSubplot:xlabel='Alloy Wheels', ylabel='count'>
In [85]:
main.loc[main["Alloy Wheels"]=="4"][["Model","Place","Brand","Varient","Alloy Wheels"]]
Out[85]:
Model Place Brand Varient Alloy Wheels
48957 BMW On-Road Price in Pune : (Not Available in Sola... X3 X3 xDrive30i M Sport 4
48966 BMW On-Road Price in Ranchi : X3 X3 xDrive30i SportX Plus 4
48969 BMW On-Road Price in Kolkata : (Not Available in G... X3 X3 xDrive30i M Sport 4
49007 BMW On-Road Price in Ranchi : X3 X3 xDrive30i M Sport 4
49015 BMW On-Road Price in Faridabad : X3 X3 xDrive30i M Sport 4
... ... ... ... ... ...
52739 BMW On-Road Price in New Delhi : X3 X3 xDrive30i SportX Plus 4
52760 BMW On-Road Price in Kolkata : (Not Available in G... X3 X3 xDrive30i M Sport 4
52783 BMW On-Road Price in Pune : X3 X3 xDrive30i SportX Plus 4
52816 BMW On-Road Price in Vadodara : X3 X3 xDrive30i M Sport 4
52845 BMW On-Road Price in Surat : X3 X3 xDrive30i SportX Plus 4

187 rows × 5 columns

In [86]:
#The above problem occured to due to same tag class name
#The problem can be resolved by using the domain knowledge
#so i see the bmw x3 which has three varient all of them have alloy wheels
def make_correction(text):
    if text=="4":
        return text.replace("4","YES")
    else:
        return text
In [87]:
main["Alloy Wheels"]=pd.Series(main["Alloy Wheels"]).apply(make_correction)
In [88]:
plt.figure(figsize=(15,5))
sns.countplot(main["Alloy Wheels"])
Out[88]:
<AxesSubplot:xlabel='Alloy Wheels', ylabel='count'>
In [89]:
main["Alloy Wheels"].unique()
Out[89]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [90]:
main["Power Windows Rear"]
Out[90]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Power Windows Rear, Length: 160031, dtype: object
In [91]:
main["Power Windows Rear"].unique()
Out[91]:
array(['YES', nan, 'NO'], dtype=object)
In [92]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows Rear"])
Out[92]:
<AxesSubplot:xlabel='Power Windows Rear', ylabel='count'>
In [93]:
main["Wheel Covers"]
Out[93]:
0          NO
1         NaN
2         NaN
3          NO
4          NO
          ... 
160026    NaN
160027    NaN
160028     NO
160029    NaN
160030    NaN
Name: Wheel Covers, Length: 160031, dtype: object
In [94]:
main["Wheel Covers"].unique()
Out[94]:
array(['NO', nan, 'YES'], dtype=object)
In [95]:
plt.figure(figsize=(15,5))
sns.countplot(main["Wheel Covers"])
Out[95]:
<AxesSubplot:xlabel='Wheel Covers', ylabel='count'>
In [96]:
main["Driver Airbag"]
Out[96]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Driver Airbag, Length: 160031, dtype: object
In [97]:
main["Driver Airbag"].unique()
Out[97]:
array(['YES', nan, 'NO'], dtype=object)
In [98]:
plt.figure(figsize=(15,5))
sns.countplot(main["Driver Airbag"])
Out[98]:
<AxesSubplot:xlabel='Driver Airbag', ylabel='count'>
In [99]:
main["Air Conditioner"]
Out[99]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Air Conditioner, Length: 160031, dtype: object
In [100]:
main["Air Conditioner"].unique()
Out[100]:
array(['YES', nan, 'NO', '5', '4', '2', '1890', '2923', '506mm', '348'],
      dtype=object)
In [101]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Conditioner"])
Out[101]:
<AxesSubplot:xlabel='Air Conditioner', ylabel='count'>
In [3]:
#This problem occured due to same class name in tag during collecting the data
def preprocess_to_null_out(catagorie,changer):
    catagorie=str(catagorie)
    if catagorie=="nan":
        return np.nan
    if changer==False:
        if catagorie!="YES" and catagorie!="NO":
            return np.nan
        else:
            return catagorie
    else:
        if catagorie!="YES" and catagorie!="NO" and catagorie!="OPTIONAL":
            return np.nan
        else:
            return catagorie
In [103]:
main["Air Conditioner"]=main["Air Conditioner"].apply(lambda x:preprocess_to_null_out(x,False))
In [104]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Conditioner"])
Out[104]:
<AxesSubplot:xlabel='Air Conditioner', ylabel='count'>
In [105]:
main["Power Adjustable Exterior Rear View Mirror"]
Out[105]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Power Adjustable Exterior Rear View Mirror, Length: 160031, dtype: object
In [106]:
main["Power Adjustable Exterior Rear View Mirror"].unique()
Out[106]:
array(['YES', nan, 'NO'], dtype=object)
In [107]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Adjustable Exterior Rear View Mirror"])
Out[107]:
<AxesSubplot:xlabel='Power Adjustable Exterior Rear View Mirror', ylabel='count'>
In [108]:
main["Automatic Climate Control"]
Out[108]:
0              YES
1              NaN
2              YES
3              YES
4              NaN
            ...   
160026    OPTIONAL
160027         NaN
160028         YES
160029         NaN
160030         NaN
Name: Automatic Climate Control, Length: 160031, dtype: object
In [109]:
main["Automatic Climate Control"].unique()
Out[109]:
array(['YES', nan, 'NO', 'OPTIONAL', '5', '4', '2 Zone', '3 Zone',
        '4 Zone', '2923', '2765'], dtype=object)
In [110]:
plt.figure(figsize=(15,5))
sns.countplot(main["Automatic Climate Control"])
Out[110]:
<AxesSubplot:xlabel='Automatic Climate Control', ylabel='count'>
In [111]:
main["Automatic Climate Control"]=main["Automatic Climate Control"].apply(lambda x:preprocess_to_null_out(x,True))
In [112]:
plt.figure(figsize=(15,5))
sns.countplot(main["Automatic Climate Control"])
Out[112]:
<AxesSubplot:xlabel='Automatic Climate Control', ylabel='count'>
In [113]:
main["Automatic Climate Control"]=main["Automatic Climate Control"].apply(lambda x:preprocess_to_null_out(x,True))
In [114]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Conditioner"])
Out[114]:
<AxesSubplot:xlabel='Air Conditioner', ylabel='count'>
In [115]:
main.columns[23]
Out[115]:
'Fog Lights - Front'
In [116]:
main["Anti Lock Braking System"]
Out[116]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Anti Lock Braking System, Length: 160031, dtype: object
In [117]:
main["Anti Lock Braking System"].unique()
Out[117]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [118]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti Lock Braking System"])
Out[118]:
<AxesSubplot:xlabel='Anti Lock Braking System', ylabel='count'>
In [119]:
main["Anti Lock Braking System"]
Out[119]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Anti Lock Braking System, Length: 160031, dtype: object
In [120]:
main["Anti Lock Braking System"].unique()
Out[120]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [121]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti Lock Braking System"])
Out[121]:
<AxesSubplot:xlabel='Anti Lock Braking System', ylabel='count'>
In [122]:
main["Fog Lights - Front"]
Out[122]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Fog Lights - Front, Length: 160031, dtype: object
In [123]:
main["Fog Lights - Front"].unique()
Out[123]:
array(['YES', nan, 'NO', '1039'], dtype=object)
In [124]:
plt.figure(figsize=(15,5))
sns.countplot(main["Fog Lights - Front"])
Out[124]:
<AxesSubplot:xlabel='Fog Lights - Front', ylabel='count'>
In [125]:
main["Fog Lights - Front"]=main["Fog Lights - Front"].apply(lambda x:preprocess_to_null_out(x,False))
In [126]:
plt.figure(figsize=(15,5))
sns.countplot(main["Fog Lights - Front"])
Out[126]:
<AxesSubplot:xlabel='Fog Lights - Front', ylabel='count'>
In [127]:
main["Power Windows Front"]
Out[127]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Power Windows Front, Length: 160031, dtype: object
In [128]:
main["Power Windows Front"].unique()
Out[128]:
array(['YES', nan, 'NO'], dtype=object)
In [129]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows Front"])
Out[129]:
<AxesSubplot:xlabel='Power Windows Front', ylabel='count'>
In [130]:
main["Passenger Airbag"]
Out[130]:
0              YES
1         OPTIONAL
2              YES
3              YES
4              YES
            ...   
160026         YES
160027         NaN
160028         YES
160029         NaN
160030         NaN
Name: Passenger Airbag, Length: 160031, dtype: object
In [131]:
main["Passenger Airbag"].unique()
Out[131]:
array(['YES', 'OPTIONAL', nan, 'NO'], dtype=object)
In [132]:
plt.figure(figsize=(15,5))
sns.countplot(main["Passenger Airbag"])
Out[132]:
<AxesSubplot:xlabel='Passenger Airbag', ylabel='count'>
In [133]:
main["Power Steering"]
Out[133]:
0         YES
1         NaN
2         YES
3         YES
4         YES
          ... 
160026      5
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Power Steering, Length: 160031, dtype: object
In [134]:
main["Power Steering"].unique()
Out[134]:
array(['YES', nan, 'NO', '5', '4', '2', '1890', '2923', '506mm', '3200',
        '348', '2765'], dtype=object)
In [135]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Steering"])
Out[135]:
<AxesSubplot:xlabel='Power Steering', ylabel='count'>
In [136]:
main["Power Steering"]=main["Power Steering"].apply(lambda x:preprocess_to_null_out(x,False))
In [137]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Steering"])
Out[137]:
<AxesSubplot:xlabel='Power Steering', ylabel='count'>
In [138]:
main["Engine Type"]
Out[138]:
0                       NaN
1                      G12B
2         K15B Smart Hybrid
3                       NaN
4                      K10C
                ...        
160026                  NaN
160027                  NaN
160028                  NaN
160029                  NaN
160030                  NaN
Name: Engine Type, Length: 160031, dtype: object
In [139]:
main["Engine Type"].unique()
Out[139]:
array([nan, 'G12B', 'K15B Smart Hybrid', 'K10C',
        'K15 Smart Hybrid Petrol Engine', 'K Series Dual jet', 'VVT',
        'F8D Petrol Engine', 'K15C Smart Hybrid', '1.2 L K Series Engine',
        'Multi Point Fuel Injection G12B BS—VI', 'K15C',
        'K15C Smart Hybrid`', 'K12N', 'F8D', 'TDCi Diesel Engine',
        'Petrol Engine', 'In-Line Engine', 'Ti-VCT Petrol Engine',
        'Diesel Engine', 'TDCI Diesel Engine', 'Duratorq Diesel Engine',
        '1.5 L TDCi', 'Duratec Petrol Engine', '1.2 litre Petrol Engine',
        '5.0L Ti-VCT V8', 'EcoBlue engine', '1.5 litre Diesel Engine',
        '1.5 L Petrol Engine', '1.5 L TiVCT', '1.5 L Diesel Engine',
        'Ti VCT Petrol Engine', 'Ecoboost Petrol Engine',
        '1.2 L Petrol Engine', '1.5L Petrol Engine', '3.0L V6 TFSI',
        '2.0 L 40 TFS', '3.0 litre V6 TFSI', 'In Line Petrol Engine', 'V6',
        '2.0 L TFSI petrol engine', '55 TFSI quattro tiptronic',
        '2.0 L TFSI', 'V8 Twin Turbo Engine', 'TFSI Petrol Engine',
        '4.0L TFSI quattro Engine', '3.0 L V6 TFSI Petrol engine',
        'Twin Turbocharged V8 Engi',
        '4.0 V8 twin-turbocharged  petrol engine',
        'Twin Turbocharger V8 Engi', 'Twin Turbocharged W12 Eng',
        'V8 Engine', 'TwinPower Turbo 6-cylinder engine',
        'M TwinPower Turbo inline', 'TwinPower Turbo 4-cylinder',
        'TwinPower Turbo 4-Cylinder engine',
        'TwinPower Turbo 4 Cylinder Petrol Engine',
        'Twin  Turbo 4 Cylinder  Diesel Engine',
        'TwinPower Turbo inline 4-cylinder engine',
        'Twin Turbo 6 Cylinder Petrol Engine',
        'TwinPower Turbo 6-cylinder',
        'TwinPower Turbo inline 6-cylinder engine',
        'Twin Power Turbo Engine', '4.4 Litre V8 Twin Turbo Diesel Engine',
        'TwinPower Turbo inline 6-cylinder petrol',
        'TwinPower Turbo inline 6 cylinder engine',
        'BUGATTI 2-Stage Turbochar', 'W16 Engine', 'FM 2.6 CR CD',
        'V8 Turbo', '90-degree V8 twin turbo Engine', 'V8 - 90° turbo',
        'V12 - 65°', 'V8-90°-turbo', '1.2 i-VTEC',
        'Water Cooled Inline i-VTEC DOHC with VTC', 'i-DTEC', 'i-VTEC',
        'water cooled inline 4 cylinder',
        'Water Cooled Inline i-DTEC DOHC', '1.5 L MPi Petrol',
        '1.2 l Kappa dual petrol', '1.1 L Petrol', '1.0 Kappa Turbo GDi',
        '1.5 l U2 diesel', 'Kappa 1.0 l turbo GDi', '1.5 L U2 CRDi Diesel',
        'Nu 2.0 Petrol', '1.2 l Kappa petrol', '2.0 l petrol MPi engine',
        '1.2 Kappa petrol', '1.0 l Turbo GDi petrol', '1.5 l MPi petrol',
        '1.5 l diesel CRDi engine', '1.5 U2 CRDi',
        '1.0 l Kappa turbo GDi petrol', '1.2 Kappa Petrol',
        '1.4 L Turbo GDi Petrol', '1.0 Turbo GDI', '1.5 l U2 CRDi diesel',
        'R 2.0 Diesel', '1.0 Turbo GDi petrol', '1.1 L Bi-Fuel',
        '1.2 l Bi-fuel', '1.9L Ddi Diesel', 'VGS Turbo Intercooled',
        '1.4L MultiAir Petrol', '2.0 L Multijet Diesel',
        '2.0L GME T4 DI TC', '2.0L Multijet Diesel',
        '2.0 Multijet II Diesel', '1.4 L MultiAir Petrol',
        'Smartstream G1.5', 'SMARTSTREAM G1.5', 'Smartstream G1.0 T - GDi',
        'Smartstream G1.4', '1.5 L CRDi VGT', '1.5 L CRDi WGT',
        'SMARTSTREAM G1.2', 'Smartstream G1.4 T-GDi', 'D2.2L VGT Diesel',
        '5.2 V10 Petrol Engine', 'V10 cylinder 90°,dual injection',
        'V12, 60°, MPI Petrol Engine', 'V8 bi-turbo engine',
        '2.0 Litre P300 Petrol Engine', '3.0 Litre diesel Engine',
        '3.0 Litre Petrol Engine', 'petrol Engine',
        '3.0 Litre Diesel Engine', 'TD4 Diesel Engine',
        '2.5-liter, 4-cyl. in-line', '3.5-literV6FourCam',
        '8GR FXS V6 24-valve DOHC with Dual VVT-i', '2 GR FXS',
        '2.5-liter L4 engine', 'V8 32-Valve DOHC Dual VVT',
        '1.2L Turbo Petrol', 'mStallion', 'mHawk', '2.2L Diesel Engine',
        '1.5L Turbo Diesel', '1.5 Litre mHAWK 75 BSVI Diesel Engine',
        'mHAWK75 BSVI', '2.0L Turbo Petrol', 'mFALCON G80',
        'mStallion 150 TGDi Engine', 'mHAWk100', 'm2DiCR 4 Cyl 2.5L TB',
        '2.2 L Turbo Diesel', 'Electric Engine', 'mHawk 130 Engine',
        'D15 1.5 Litre Diesel Engine', 'mhawk',
        '4.0-litre V8 32 valve Twin Turbo', '3.0L V6 Diesel Engine',
        'V-Type Diesel Engine', 'V-Type  Engine', '3.0L V6 Petrol Engine',
        'V-Type Petrol Engine', 'V Type Petrol Engine', 'V Type Engine',
        '3.0 L in-line 6 cylinder engine', '2.0-litre in line engine',
        '4.0-litre V8 Biturbo', 'OM654M', 'V12',
        '4.0-litre V8 biturbo engine', '4.0-LITER V8 BITURBO ENGINE',
        'In-Line 4 Cylinder diesel Engine',
        'AMG 2.0-LITRE 4-CYLINDER ENGINE', '3.0-litre L6 in-line engine',
        'L4 200', 'AMG 35 4MATIC', 'In-Line 4 Cylinder Petrol Engine',
        'In-Line Petrol Engine', 'OM 656 Engine',
        '3.0-litre 6-cylinder In-lineTurbo Engine', 'V8',
        '3.0-litre diesel', '2.2L V 220d', '3.0-litre petrol',
        'M256 engine', 'V6 bi-turbo engine', 'M 264 petrol engine',
        '4.0-L V8 BITURBO engine', '3.0-LITRE V6 BITURBO ENGINE',
        'L6 diesel Engine', '2.0L V 220 d', 'V8 biturbo engine',
        'OM 654 Diesel engine', '2.0-litre OM654 diesel engine',
        '4.0-litre petrol', 'L4 200d', '2.0 L Turbocharged Diesel',
        '1.5L Turbocharged Intercooled', '1.5 L Turbocharged Intercooled',
        'DIESEL 2.0L TURBO', 'DIESEL 2.0L TWIN TURBO',
        '2.0L Turbocharged Diesel', '220TURBO', 'VTi-TECH',
        'Petroll Engine', 'Petrol', 'Intercooled Turbocharged',
        'Intercooled Turbocharger', '4 Cylinder In-Line Petrol',
        'DI-Diesel Engine', '2.4 litre 16 Valve Petrol',
        'Common Rail DI-D Engine', 'HRA0 1.0 TURBO PETROL',
        'B4D 1.0 NA PETROL', '1.3 L HR13DDT Turbo Petrol',
        'V6 Twin Turbo Petrol Engine', '1.5 L HR15 Petrol',
        '4.0 Litre V6 Turbo Engine', '2.0L Mid-engine',
        'Twin Turbo V8 Engine', 'V6 Petrol Engine',
        'naturally aspirated boxer engine', 'Twin-Turbocharged Engine',
        '4.0L Petrol engine', '3.0 Litre V6 Engine', 'Twin-turbo V8',
        'V6 Diesel Engine', '4.0 V8 twin-turbo', 'V8 Petrol Engine',
        '1.0L ENERGY', '0.8 SCe', '1.0 SCe', '1.0L TURBO',
        'V12 Petrol Engine', '6.75-litre V12 engine', '1.5 L TSI Petrol',
        '2.0 L Turbocharged Petrol Engine', '1.0 L TSI Petrol',
        '1.0 TSI Petrol Engine', '1.5 TSI Petrol', '1.5 TSI Petrol Engine',
        '1.0 TSI Petrol', '2.0 L TSI Petrol Engine',
        '2.0L Turbocharged Petrol', 'Kryotec 2.0 L Turbocharged Engine',
        '1.2l Turbocharged Revotron Engine', '1.2 L Revotron',
        '1.2 l Revotron Engine', 'Revotron 1.2 l BS6 Engine',
        '1.2 l Revotron', 'TATA 4SP CR TCIC',
        '1.5l Turbocharged Revotorq Engine', '1.5 l Turbocharged Revotorq',
        '1.2L Revotron Engine', '1.2 l i-Turbo', '1.2 L i-CNG',
        '2.8 L Diesel engine', '2.8 L Diesel Engine', '2.7L Dual VVT',
        '2.7L Petrol Engine', '2.7 L Dual VVT',
        '2.5L Dynamic Force Engine', 'Gasoline Hybrid and E-FOUR',
        '1.0L TSI', 'TSI Petrol Engine', '1.5L TSI EVO with ACT',
        '2.0 TSI', 'Twin Turbo & Super Charge Petrol Engine',
        'Turbo Petrol Engine'], dtype=object)
In [140]:
# Overall repetition
# Twin Turbocharged V8 Engi == Twin Turbocharger V8 Engi
# TwinPower Turbo 4 Cylinder Petrol Engine == TwinPower Turbo 4-Cylinder engine
# 2.0 L TFSI petrol engine == 2.0 L 40 TFS
# 2.0L Multijet Diesel == 2.0 L Multijet Diesel
# 1.5 l MPi petrol == 1.5 L MPi Petrol
# 1.2 Kappa petrol == 1.2 l Kappa petrol
# SMARTSTREAM G1.5 == Smartstream G1.5
# 3.0 Litre diesel Engine == 3.0 Litre Diesel Engine
# V-Type Petrol Engine == V Type Petrol Engine
# Petroll Engine == petrol Engine == Petrol
# V-Type  Engine == V Type Engine
# 4.0-litre V8 biturbo engine == 4.0-LITER V8 BITURBO ENGINE == 4.0-litre V8 Biturbo
# 2.0 L Turbocharged Petrol Engine == 2.0L Turbocharged Petrol
# 2.8 L Diesel engine == 2.8 L Diesel Engine
# 2.7L Dual VVT==2.7 L Dual VVT
# 1.5 TSI Petrol == 1.5 TSI Petrol Engine
# 1.0 TSI Petrol Engine == 1.0 TSI Petrol == 1.0 L TSI Petrol'
In [141]:
def preprocess_Engine_Type(text,varient):
    global varient_type,fuel_types
    Overall_CNGS=["CNG","ALPG"]
    text=str(text)
    if text=="nan":
        return np.nan
    if (Overall_CNGS[0] in varient)or(Overall_CNGS[1] in varient):
        if "Petrol" in text:
            text=text.replace("Petrol","")
    if "`" in text:
        text=text.replace("`","")
    if ("Petrol" not in text)and("Diesel" not in text)and("CNG" not in text)and("Electric" not in text)and("petrol" not in text)and("diesel" not in text)and("electric" not in text)and("cng" not in text)and("PETROL" not in text):
        fuel_type=""
        if varient in varient_type:
            varient_index=varient_type.index(varient)
            fuel_type=fuel_types[varient_index]
        else:
            if (Overall_CNGS[0] in varient)or(Overall_CNGS[1] in varient):
                fuel_type="Cng"
                varient_type.append(varient)
                fuel_types.append(fuel_type)
            else:
                fuel_type=main.loc[main["Engine Type"]=="{}".format(text)][["Fuel Type"]].iloc[0,0]
                varient_type.append(varient)
                fuel_types.append(fuel_type)
        fuel_type=fuel_type.title()
        text=text.title()
        if "Engine" not in text:
            text=text+" {}".format(fuel_type)
        else:
            index_engine=text.find("Engine")
            if (text[index_engine-1]==" ")and(text[index_engine]=="E"):
                text=text.replace(" E"," {} E".format(fuel_type))
    text=text.title()
    if (text[text.find("L")-1] != " ")and(text[text.find("L")+1]==" "):
        text=text.replace("L"," L")
    if "litre" in text:
        text=text.title()
    if (text[text.find("L")-1] == " ")and(text[text.find("L")+1]==" "):
        text=text.replace("L","Litre")
    if "Engine" not in text:
        text=text+" Engine"
    if "-" in text:
        text=text.replace("-"," ")
    #Direct Handling
    if "Liter" in text:
        text=text.replace("Liter","Litre")
    if "Petroll" in text:
        text=text.replace("Petroll","Petrol")
    if "Engine" not in text:
        text=text.title()+" Engine"
    if "   " in text:
        text=text.replace("   "," ")
    if "  " in text:
        text=text.replace("  "," ")
    #Two Exception case:
    if text=="1.5 Tsi Petrol Engine":
        return "1.5 Litre Tsi Petrol Engine"
    if text=="1.0 Tsi Petrol Engine":
        return "1.0 Litre Tsi Petrol Engine"
    return text
In [142]:
#Maruti brand repetition
# K15C Smart Hybrid == K15C Smart Hybrid`
In [143]:
main.loc[main.Model=="Maruti"][["Engine Type"]]["Engine Type"].unique()
Out[143]:
array([nan, 'G12B', 'K15B Smart Hybrid', 'K10C',
        'K15 Smart Hybrid Petrol Engine', 'K Series Dual jet', 'VVT',
        'F8D Petrol Engine', 'K15C Smart Hybrid', '1.2 L K Series Engine',
        'Multi Point Fuel Injection G12B BS—VI', 'K15C',
        'K15C Smart Hybrid`', 'K12N', 'F8D'], dtype=object)
In [144]:
varient_type=[]
fuel_types=[]
_maruti=main.loc[main.Model=="Maruti"][["Engine Type","Varient"]]
In [145]:
_maruti["Engine Type"]=_maruti.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [146]:
_maruti["Engine Type"].unique()
Out[146]:
array([nan, 'G12B Petrol Engine', 'K15B Smart Hybrid Petrol Engine',
        'K10C Petrol Engine', 'K15 Smart Hybrid Petrol Engine',
        'K Series Dual Jet Petrol Engine', 'Vvt Petrol Engine',
        'F8D Cng Engine', 'K15C Smart Hybrid Petrol Engine',
        '1.2 Litre K Series Petrol Engine',
        'Multi Point Fuel Injection G12B Bs—Vi Petrol Engine',
        'K15C Cng Engine', 'K10C Cng Engine',
        'Multi Point Fuel Injection G12B Bs—Vi Cng Engine',
        'F8D Petrol Engine', 'K Series Dual Jet Cng Engine',
        'G12B Cng Engine', 'K12N Petrol Engine'], dtype=object)
In [147]:
main.loc[(main["Model"]=="Maruti") & (main["Fuel Type"]=="CNG")][["Engine Type"]]["Engine Type"].unique()
Out[147]:
array(['F8D Petrol Engine', nan, 'K15C', 'K10C',
        'Multi Point Fuel Injection G12B BS—VI', 'K Series Dual jet',
        'G12B'], dtype=object)
In [148]:
#Ford brand repetition
# 1.5 litre Diesel Engine == 1.5 L Diesel Engine
# 1.5 L Petrol Engine == 1.5L Petrol Engine
# Ti-VCT Petrol Engine == Ti VCT Petrol Engine
# 1.2 litre Petrol Engine == 1.2 L Petrol Engine
In [149]:
main.loc[main.Model=="Ford"][["Engine Type"]]["Engine Type"].unique()
Out[149]:
array(['TDCi Diesel Engine', 'Petrol Engine', 'In-Line Engine',
        'Ti-VCT Petrol Engine', 'Diesel Engine', 'TDCI Diesel Engine',
        'Duratorq Diesel Engine', '1.5 L TDCi', 'Duratec Petrol Engine',
        nan, '1.2 litre Petrol Engine', '5.0L Ti-VCT V8', 'EcoBlue engine',
        '1.5 litre Diesel Engine', '1.5 L Petrol Engine', '1.5 L TiVCT',
        '1.5 L Diesel Engine', 'Ti VCT Petrol Engine',
        'Ecoboost Petrol Engine', '1.2 L Petrol Engine',
        '1.5L Petrol Engine'], dtype=object)
In [150]:
_ford=main.loc[main.Model=="Ford"][["Varient","Engine Type"]]
In [151]:
varient_type=[]
fuel_types=[]
_ford["Engine Type"]=_ford.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [152]:
_ford["Engine Type"].unique()
Out[152]:
array(['Tdci Diesel Engine', 'Petrol Engine', 'In Line Diesel Engine',
        'Ti Vct Petrol Engine', 'Diesel Engine', 'Duratorq Diesel Engine',
        '1.5 Litre Tdci Diesel Engine', 'Duratec Petrol Engine', nan,
        '1.2 Litre Petrol Engine', '5.0 Litre Ti Vct V8 Petrol Engine',
        'Ecoblue Diesel Engine', '1.5 Litre Diesel Engine',
        '1.5 Litre Petrol Engine', 'In Line Cng Engine',
        '1.5 Litre Tivct Petrol Engine', 'Ti Vct Cng Engine',
        'Ecoboost Petrol Engine'], dtype=object)
In [153]:
_ford["Engine Type"].unique()
Out[153]:
array(['Tdci Diesel Engine', 'Petrol Engine', 'In Line Diesel Engine',
        'Ti Vct Petrol Engine', 'Diesel Engine', 'Duratorq Diesel Engine',
        '1.5 Litre Tdci Diesel Engine', 'Duratec Petrol Engine', nan,
        '1.2 Litre Petrol Engine', '5.0 Litre Ti Vct V8 Petrol Engine',
        'Ecoblue Diesel Engine', '1.5 Litre Diesel Engine',
        '1.5 Litre Petrol Engine', 'In Line Cng Engine',
        '1.5 Litre Tivct Petrol Engine', 'Ti Vct Cng Engine',
        'Ecoboost Petrol Engine'], dtype=object)
In [154]:
#Ford brand repetition
#3.0L V6 TFSI==3.0 litre V6 TFSI
#3.0 litre V6 TFSI == 3.0 L V6 TFSI Petrol engine
#2.0 L TFSI petrol engine==2.0 L TFSI
In [155]:
main.loc[main.Model=="Audi"][["Engine Type"]]["Engine Type"].unique()
Out[155]:
array(['3.0L V6 TFSI', nan, '2.0 L 40 TFS', '3.0 litre V6 TFSI',
        'In Line Petrol Engine', 'V6', '2.0 L TFSI petrol engine',
        '55 TFSI quattro tiptronic', '2.0 L TFSI', 'V8 Twin Turbo Engine',
        'TFSI Petrol Engine', '4.0L TFSI quattro Engine',
        '3.0 L V6 TFSI Petrol engine'], dtype=object)
In [156]:
_audi=main.loc[main.Model=="Audi"][["Varient","Engine Type"]]
In [157]:
varient_type=[]
fuel_types=[]
_audi["Engine Type"]=_audi.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [158]:
_audi["Engine Type"].unique()
Out[158]:
array(['3.0 Litre V6 Tfsi Petrol Engine', nan,
        '2.0 Litre 40 Tfs Petrol Engine', 'In Line Petrol Engine',
        'V6 Petrol Engine', '2.0 Litre Tfsi Petrol Engine',
        '55 Tfsi Quattro Tiptronic Petrol Engine',
        'V8 Twin Turbo Petrol Engine', 'Tfsi Petrol Engine',
        '4.0 Litre Tfsi Quattro Petrol Engine'], dtype=object)
In [159]:
#Bugati brand has no repetition
In [160]:
main.loc[main.Model=="Bugatti"][["Engine Type"]]["Engine Type"].unique()
Out[160]:
array(['BUGATTI 2-Stage Turbochar', 'W16 Engine'], dtype=object)
In [161]:
_bugatti=main.loc[main.Model=="Bugatti"][["Engine Type","Varient"]]
In [162]:
varient_type=[]
fuel_types=[]
_bugatti["Engine Type"]=_bugatti.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [163]:
_bugatti["Engine Type"].unique()
Out[163]:
array(['Bugatti 2 Stage Turbochar Petrol Engine', 'W16 Petrol Engine'],
      dtype=object)
In [164]:
#BMW brand has repetition
# TwinPower Turbo 6-cylinder engine == TwinPower Turbo 6-cylinder
# TwinPower Turbo 4-cylinder == TwinPower Turbo 4-Cylinder engine == TwinPower Turbo 4 Cylinder Petrol Engine
# Twin Power Turbo Engine
In [165]:
main.loc[main.Model=="BMW"][["Engine Type"]]["Engine Type"].unique()
Out[165]:
array(['TwinPower Turbo 6-cylinder engine', 'M TwinPower Turbo inline',
        'TwinPower Turbo 4-cylinder', 'TwinPower Turbo 4-Cylinder engine',
        'TwinPower Turbo 4 Cylinder Petrol Engine', nan,
        'Twin  Turbo 4 Cylinder  Diesel Engine',
        'TwinPower Turbo inline 4-cylinder engine',
        'Twin Turbo 6 Cylinder Petrol Engine',
        'TwinPower Turbo 6-cylinder',
        'TwinPower Turbo inline 6-cylinder engine',
        'Twin Power Turbo Engine', '4.4 Litre V8 Twin Turbo Diesel Engine',
        'TwinPower Turbo inline 6-cylinder petrol',
        'TwinPower Turbo inline 6 cylinder engine'], dtype=object)
In [166]:
_bmw=main.loc[main.Model=="BMW"][["Varient","Engine Type"]]
In [167]:
varient_type=[]
fuel_types=[]
_bmw["Engine Type"]=_bmw.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [168]:
_bmw["Engine Type"].unique()
Out[168]:
array(['Twinpower Turbo 6 Cylinder Petrol Engine',
        'M Twinpower Turbo Inline Petrol Engine',
        'Twinpower Turbo 4 Cylinder Petrol Engine',
        'Twinpower Turbo 4 Cylinder Diesel Engine', nan,
        'Twin Turbo 4 Cylinder Diesel Engine',
        'Twinpower Turbo Inline 4 Cylinder Petrol Engine',
        'Twin Turbo 6 Cylinder Petrol Engine',
        'Twinpower Turbo Inline 6 Cylinder Petrol Engine',
        'Twin Power Turbo Petrol Engine',
        '4.4 Litre V8 Twin Turbo Diesel Engine',
        'Twinpower Turbo Inline 6 Cylinder Diesel Engine'], dtype=object)
In [169]:
_bmw["Engine Type"].unique()
Out[169]:
array(['Twinpower Turbo 6 Cylinder Petrol Engine',
        'M Twinpower Turbo Inline Petrol Engine',
        'Twinpower Turbo 4 Cylinder Petrol Engine',
        'Twinpower Turbo 4 Cylinder Diesel Engine', nan,
        'Twin Turbo 4 Cylinder Diesel Engine',
        'Twinpower Turbo Inline 4 Cylinder Petrol Engine',
        'Twin Turbo 6 Cylinder Petrol Engine',
        'Twinpower Turbo Inline 6 Cylinder Petrol Engine',
        'Twin Power Turbo Petrol Engine',
        '4.4 Litre V8 Twin Turbo Diesel Engine',
        'Twinpower Turbo Inline 6 Cylinder Diesel Engine'], dtype=object)
In [170]:
#Force has no repetition
In [171]:
main.loc[main.Model=="Force"][["Engine Type"]]["Engine Type"].unique()
Out[171]:
array(['FM 2.6 CR CD'], dtype=object)
In [172]:
_force=main.loc[main.Model=="Force"][["Varient","Engine Type"]]
In [173]:
varient_type=[]
fuel_types=[]
_force["Engine Type"]=_force.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [174]:
_force["Engine Type"].unique()
Out[174]:
array(['Fm 2.6 Cr Cd Diesel Engine'], dtype=object)
In [175]:
#Ferrari
#V8 - 90° turbo == V8-90°-turbo
In [176]:
main.loc[main.Model=="Ferrari"][["Engine Type"]]["Engine Type"].unique()
Out[176]:
array(['V8 Turbo', nan, '90-degree V8 twin turbo Engine',
        'V8 - 90° turbo', 'V12 - 65°', 'V8-90°-turbo'], dtype=object)
In [177]:
_ferrari=main.loc[main.Model=="Ferrari"][["Varient","Engine Type"]]
In [178]:
varient_type=[]
fuel_types=[]
_ferrari["Engine Type"]=_ferrari.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [179]:
_ferrari["Engine Type"].unique()
Out[179]:
array(['V8 Turbo Petrol Engine', nan,
        '90 Degree V8 Twin Turbo Petrol Engine',
        'V8 90° Turbo Petrol Engine', 'V12 65° Petrol Engine'],
      dtype=object)
In [180]:
#Honda has no repetition
In [181]:
main.loc[main.Model=="Honda"][["Engine Type"]]["Engine Type"].unique()
Out[181]:
array(['1.2 i-VTEC', 'Water Cooled Inline i-VTEC DOHC with VTC', 'i-DTEC',
        'i-VTEC', 'water cooled inline 4 cylinder',
        'Water Cooled Inline i-DTEC DOHC'], dtype=object)
In [182]:
_honda=main.loc[main.Model=="Honda"][["Varient","Engine Type"]]
In [183]:
varient_type=[]
fuel_types=[]
_honda["Engine Type"]=_honda.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [184]:
_honda["Engine Type"].unique()
Out[184]:
array(['1.2 I Vtec Petrol Engine',
        'Water Cooled Inline I Vtec Dohc With Vtc Petrol Engine',
        'I Dtec Diesel Engine', 'I Vtec Petrol Engine',
        'Water Cooled Inline 4 Cylinder Petrol Engine',
        'Water Cooled Inline I Dtec Dohc Diesel Engine'], dtype=object)
In [185]:
#Isuzu has no repetition
In [186]:
main.loc[main.Model=="Isuzu"][["Engine Type"]]["Engine Type"].unique()
Out[186]:
array(['1.9L Ddi Diesel', 'VGS Turbo Intercooled'], dtype=object)
In [187]:
_isuzu=main.loc[main.Model=="Isuzu"][["Varient","Engine Type"]]
In [188]:
varient_type=[]
fuel_types=[]
_isuzu["Engine Type"]=_isuzu.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [189]:
_isuzu["Engine Type"].unique()
Out[189]:
array(['1.9 Litre Ddi Diesel Engine',
        'Vgs Turbo Intercooled Diesel Engine'], dtype=object)
In [190]:
# Jeep repetition
# 1.4L MultiAir Petrol == 1.4 L MultiAir Petrol
# 2.0 L Multijet Diesel == 2.0L Multijet Diesel
In [191]:
main.loc[main.Model=="Jeep"][["Engine Type"]]["Engine Type"].unique()
Out[191]:
array(['1.4L MultiAir Petrol', '2.0 L Multijet Diesel',
        '2.0L GME T4 DI TC', '2.0L Multijet Diesel', nan,
        '2.0 Multijet II Diesel', '1.4 L MultiAir Petrol'], dtype=object)
In [192]:
_jeep=main.loc[main.Model=="Jeep"][["Varient","Engine Type"]]
In [193]:
varient_type=[]
fuel_types=[]
_jeep["Engine Type"]=_jeep.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [194]:
_jeep["Engine Type"].unique()
Out[194]:
array(['1.4 Litre Multiair Petrol Engine',
        '2.0 Litre Multijet Diesel Engine',
        '2.0 Litre Gme T4 Di Tc Petrol Engine', nan,
        '2.0 Multijet Ii Diesel Engine'], dtype=object)
In [195]:
#Kia repetition
#Smartstream G1.5 == SMARTSTREAM G1.5
In [196]:
main.loc[main.Model=="Kia"][["Engine Type"]]["Engine Type"].unique()
Out[196]:
array(['Smartstream G1.5', 'SMARTSTREAM G1.5', 'Smartstream G1.0 T - GDi',
        'Smartstream G1.4', '1.5 L CRDi VGT', '1.5 L CRDi WGT',
        'SMARTSTREAM G1.2', 'Smartstream G1.4 T-GDi', 'D2.2L VGT Diesel',
        nan], dtype=object)
In [197]:
_kia=main.loc[main.Model=="Kia"][["Varient","Engine Type"]]
In [198]:
varient_type=[]
fuel_types=[]
_kia["Engine Type"]=_kia.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [199]:
_kia["Engine Type"].unique()
Out[199]:
array(['Smartstream G1.5 Petrol Engine',
        'Smartstream G1.0 T Gdi Petrol Engine',
        'Smartstream G1.4 Petrol Engine',
        '1.5 Litre Crdi Vgt Diesel Engine',
        '1.5 Litre Crdi Wgt Diesel Engine',
        'Smartstream G1.2 Petrol Engine',
        'Smartstream G1.4 T Gdi Petrol Engine',
        'D2.2 Litre Vgt Diesel Engine', nan], dtype=object)
In [200]:
# Lamborghini has no repetition
In [201]:
main.loc[main.Model=="Lamborghini"][["Engine Type"]]["Engine Type"].unique()
Out[201]:
array(['5.2 V10 Petrol Engine', 'V10 cylinder 90°,dual injection',
        'V12, 60°, MPI Petrol Engine', 'V8 bi-turbo engine'], dtype=object)
In [202]:
_lamborghini=main.loc[main.Model=="Lamborghini"][["Varient","Engine Type"]]
In [203]:
varient_type=[]
fuel_types=[]
_lamborghini["Engine Type"]=_lamborghini.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [204]:
_lamborghini["Engine Type"].unique()
Out[204]:
array(['5.2 V10 Petrol Engine',
        'V10 Cylinder 90°,Dual Injection Petrol Engine',
        'V12, 60°, Mpi Petrol Engine', 'V8 Bi Turbo Petrol Engine'],
      dtype=object)
In [205]:
# Land_Rover repetition
#3.0 Litre diesel Engine == 3.0 Litre Diesel Engine
In [206]:
main.loc[main.Model=="Land_Rover"][["Engine Type"]]["Engine Type"].unique()
Out[206]:
array(['2.0 Litre P300 Petrol Engine', '3.0 Litre diesel Engine',
        '3.0 Litre Petrol Engine', nan, 'petrol Engine',
        '3.0 Litre Diesel Engine', 'TD4 Diesel Engine', 'Diesel Engine'],
      dtype=object)
In [207]:
_Land_Rover=main.loc[main.Model=="Land_Rover"][["Varient","Engine Type"]]
In [208]:
varient_type=[]
fuel_types=[]
_Land_Rover["Engine Type"]=_Land_Rover.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [209]:
_Land_Rover["Engine Type"].unique()
Out[209]:
array(['2.0 Litre P300 Petrol Engine', '3.0 Litre Diesel Engine',
        '3.0 Litre Petrol Engine', nan, 'Petrol Engine',
        'Td4 Diesel Engine', 'Diesel Engine'], dtype=object)
In [210]:
# Lexus no repetition
In [211]:
main.loc[main.Model=="Lexus"][["Engine Type"]]["Engine Type"].unique()
Out[211]:
array(['2.5-liter, 4-cyl. in-line', '3.5-literV6FourCam',
        '8GR FXS V6 24-valve DOHC with Dual VVT-i', '2 GR FXS',
        '2.5-liter L4 engine', 'V8 32-Valve DOHC Dual VVT'], dtype=object)
In [212]:
_Lexus=main.loc[main.Model=="Lexus"][["Varient","Engine Type"]]
In [213]:
varient_type=[]
fuel_types=[]
_Lexus["Engine Type"]=_Lexus.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [214]:
_Lexus["Engine Type"].unique()
Out[214]:
array(['2.5 Litre, 4 Cyl. In Line Petrol Engine',
        '3.5 Litrev6Fourcam Petrol Engine',
        '8Gr Fxs V6 24 Valve Dohc With Dual Vvt I Petrol Engine',
        '2 Gr Fxs Petrol Engine', '2.5 Litre L4 Petrol Engine',
        'V8 32 Valve Dohc Dual Vvt Petrol Engine'], dtype=object)
In [215]:
# Mahindra no repetition
# mHawk==mhawk
In [216]:
main.loc[main.Model=="Mahindra"][["Engine Type"]]["Engine Type"].unique()
Out[216]:
array(['1.2L Turbo Petrol', 'mStallion', 'mHawk', '2.2L Diesel Engine',
        '1.5L Turbo Diesel', '1.5 Litre mHAWK 75 BSVI Diesel Engine',
        'mHAWK75 BSVI', '2.0L Turbo Petrol', 'mFALCON G80',
        'mStallion 150 TGDi Engine', 'mHAWk100', 'm2DiCR 4 Cyl 2.5L TB',
        '2.2 L Turbo Diesel', 'Electric Engine', 'mHawk 130 Engine',
        'D15 1.5 Litre Diesel Engine', 'mhawk'], dtype=object)
In [217]:
_Mahindra=main.loc[main.Model=="Mahindra"][["Varient","Engine Type"]]
In [218]:
varient_type=[]
fuel_types=[]
_Mahindra["Engine Type"]=_Mahindra.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [219]:
_Mahindra["Engine Type"].unique()
Out[219]:
array(['1.2 Litre Turbo Petrol Engine', 'Mstallion Petrol Engine',
        'Mhawk Diesel Engine', '2.2 Litre Diesel Engine',
        '1.5 Litre Turbo Diesel Engine',
        '1.5 Litre Mhawk 75 Bsvi Diesel Engine',
        'Mhawk75 Bsvi Diesel Engine', '2.0 Litre Turbo Petrol Engine',
        'Mfalcon G80 Petrol Engine', 'Mstallion 150 Tgdi Petrol Engine',
        'Mhawk100 Diesel Engine',
        'M2Dicr 4 Cyl 2.5 Litre Tb Diesel Engine',
        '2.2 Litre Turbo Diesel Engine', 'Electric Engine',
        'Mhawk 130 Diesel Engine', 'D15 1.5 Litre Diesel Engine'],
      dtype=object)
In [220]:
# Aston_Martin no repetition
In [221]:
main.loc[main.Model=="Aston_Martin"][["Engine Type"]]["Engine Type"].unique()
Out[221]:
array([nan, '4.0-litre V8 32 valve Twin Turbo'], dtype=object)
In [222]:
_Aston_Martin=main.loc[main.Model=="Aston_Martin"][["Varient","Engine Type"]]
In [223]:
varient_type=[]
fuel_types=[]
_Aston_Martin["Engine Type"]=_Aston_Martin.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [224]:
_Aston_Martin["Engine Type"].unique()
Out[224]:
array([nan, '4.0 Litre V8 32 Valve Twin Turbo Diesel Engine'],
      dtype=object)
In [225]:
# Maserati no repetition
#check V-Type  Engine
#V Type Petrol Engine == V-Type Petrol Engine
In [226]:
main.loc[main.Model=="Maserati"][["Engine Type"]]["Engine Type"].unique()
Out[226]:
array(['3.0L V6 Diesel Engine', 'V-Type Diesel Engine', 'V-Type  Engine',
        nan, '3.0L V6 Petrol Engine', 'V-Type Petrol Engine',
        'V Type Petrol Engine'], dtype=object)
In [227]:
_Maserati=main.loc[main.Model=="Maserati"][["Varient","Engine Type"]]
In [228]:
varient_type=[]
fuel_types=[]
_Maserati["Engine Type"]=_Maserati.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [229]:
_Maserati["Engine Type"].unique()
Out[229]:
array(['3.0 Litre V6 Diesel Engine', 'V Type Diesel Engine',
        'V Type Petrol Engine', nan, '3.0 Litre V6 Petrol Engine'],
      dtype=object)
In [230]:
#Mercedes-Benz repetition
#4.0-litre V8 Biturbo == 4.0-LITER V8 BITURBO ENGINE == 4.0-litre V8 biturbo engin
#Check OM 656 Engine'
In [231]:
main.loc[main.Model=="Mercedes-Benz"][["Engine Type"]]["Engine Type"].unique()
Out[231]:
array(['V Type Engine', '3.0 L in-line 6 cylinder engine',
        '2.0-litre in line engine', nan, '4.0-litre V8 Biturbo', 'OM654M',
        'V12', 'Diesel Engine', '4.0-litre V8 biturbo engine',
        '4.0-LITER V8 BITURBO ENGINE', 'In-Line 4 Cylinder diesel Engine',
        'AMG 2.0-LITRE 4-CYLINDER ENGINE', '3.0-litre L6 in-line engine',
        'L4 200', 'AMG 35 4MATIC', 'In-Line 4 Cylinder Petrol Engine',
        'In-Line Petrol Engine', 'OM 656 Engine',
        '3.0-litre 6-cylinder In-lineTurbo Engine', 'V8',
        '3.0-litre diesel', '2.2L V 220d', 'Petrol Engine',
        '3.0-litre petrol', 'M256 engine', 'V6 bi-turbo engine',
        'M 264 petrol engine', '4.0-L V8 BITURBO engine',
        '3.0-LITRE V6 BITURBO ENGINE', 'L6 diesel Engine', '2.0L V 220 d',
        'V8 biturbo engine', 'OM 654 Diesel engine',
        '2.0-litre OM654 diesel engine', '4.0-litre petrol', 'L4 200d'],
      dtype=object)
In [232]:
_Mercedes_Benz=main.loc[main.Model=="Mercedes-Benz"][["Varient","Engine Type"]]
In [233]:
varient_type=[]
fuel_types=[]
_Mercedes_Benz["Engine Type"]=_Mercedes_Benz.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [234]:
_Mercedes_Benz["Engine Type"].unique()
Out[234]:
array(['V Type Diesel Engine',
        '3.0 Litre In Litreine 6 Cylinder Diesel Engine',
        '2.0 Litre In Line Petrol Engine', nan,
        '4.0 Litre V8 Biturbo Petrol Engine', 'Om654M Diesel Engine',
        'V12 Petrol Engine', 'Diesel Engine',
        'In Line 4 Cylinder Diesel Engine',
        'Amg 2.0 Litre 4 Cylinder Petrol Engine',
        '3.0 Litre L6 In Line Petrol Engine', 'L4 200 Petrol Engine',
        'Amg 35 4Matic Petrol Engine', 'In Line 4 Cylinder Petrol Engine',
        'In Line Petrol Engine', 'Om 656 Diesel Engine',
        '3.0 Litre 6 Cylinder In Lineturbo Petrol Engine',
        'V8 Petrol Engine', '3.0 Litre Diesel Engine',
        '2.2 Litre V 220D Diesel Engine', 'Petrol Engine',
        '3.0 Litre Petrol Engine', 'M256 Petrol Engine',
        'V6 Bi Turbo Petrol Engine', 'M 264 Petrol Engine',
        '3.0 Litre V6 Biturbo Petrol Engine', 'L6 Diesel Engine',
        '2.0 Litre V 220 D Diesel Engine', 'V8 Biturbo Petrol Engine',
        'Om 654 Diesel Engine', '2.0 Litre Om654 Diesel Engine',
        '4.0 Litre Petrol Engine', 'L4 200D Diesel Engine'], dtype=object)
In [235]:
#MG repetition
#2.0 L Turbocharged Diesel == 2.0L Turbocharged Diesel
#1.5L Turbocharged Intercooled == 1.5 L Turbocharged Intercooled
In [236]:
main.loc[main.Model=="MG"][["Engine Type"]]["Engine Type"].unique()
Out[236]:
array(['2.0 L Turbocharged Diesel', '1.5L Turbocharged Intercooled',
        '1.5 L Turbocharged Intercooled', 'DIESEL 2.0L TURBO',
        'DIESEL 2.0L TWIN TURBO', '2.0L Turbocharged Diesel', '220TURBO',
        'VTi-TECH', nan], dtype=object)
In [237]:
_MG=main.loc[main.Model=="MG"][["Varient","Engine Type"]]
In [238]:
varient_type=[]
fuel_types=[]
_MG["Engine Type"]=_MG.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [239]:
_MG["Engine Type"].unique()
Out[239]:
array(['2.0 Litre Turbocharged Diesel Engine',
        '1.5 Litre Turbocharged Intercooled Petrol Engine',
        'Diesel 2.0 Litre Turbo Diesel Engine',
        'Diesel 2.0 Litre Twin Turbo Diesel Engine',
        '220Turbo Petrol Engine', 'Vti Tech Petrol Engine', nan],
      dtype=object)
In [240]:
#Mini repetition
#Petrol Engine == Petroll Engine
In [241]:
main.loc[main.Model=="Mini"][["Engine Type"]]["Engine Type"].unique()
Out[241]:
array(['Petrol Engine', nan, 'Petroll Engine'], dtype=object)
In [242]:
_Mini=main.loc[main.Model=="Mini"][["Varient","Engine Type"]]
In [243]:
varient_type=[]
fuel_types=[]
_Mini["Engine Type"]=_Mini.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [244]:
_Mini["Engine Type"].unique()
Out[244]:
array(['Petrol Engine', nan], dtype=object)
In [245]:
#Mitsubishi repetition
#Petrol Engine == Petroll Engine
#check Intercooled Turbocharged == Intercooled Turbocharger
In [246]:
main.loc[main.Model=="Mitsubishi"][["Engine Type"]]["Engine Type"].unique()
Out[246]:
array(['In-Line Engine', 'Petrol', nan, 'Intercooled Turbocharged',
        'Intercooled Turbocharger', '4 Cylinder In-Line Petrol',
        'DI-Diesel Engine', '2.4 litre 16 Valve Petrol',
        'Common Rail DI-D Engine'], dtype=object)
In [247]:
_Mitsubishi=main.loc[main.Model=="Mitsubishi"][["Varient","Engine Type"]]
In [248]:
varient_type=[]
fuel_types=[]
_Mitsubishi["Engine Type"]=_Mitsubishi.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [249]:
_Mitsubishi["Engine Type"].unique()
Out[249]:
array(['In Line Diesel Engine', 'Petrol Engine', 'In Line Cng Engine',
        nan, 'Intercooled Turbocharged Petrol Engine',
        'Intercooled Turbocharger Diesel Engine',
        '4 Cylinder In Line Petrol Engine', 'Di Diesel Engine',
        '2.4 Litre 16 Valve Petrol Engine',
        'Common Rail Di D Diesel Engine'], dtype=object)
In [250]:
#Nissan has no repetition
In [251]:
main.loc[main.Model=="Nissan"][["Engine Type"]]["Engine Type"].unique()
Out[251]:
array(['HRA0 1.0 TURBO PETROL', 'B4D 1.0 NA PETROL',
        '1.3 L HR13DDT Turbo Petrol', 'V6 Twin Turbo Petrol Engine',
        '1.5 L HR15 Petrol'], dtype=object)
In [252]:
_Nissan=main.loc[main.Model=="Nissan"][["Varient","Engine Type"]]
In [253]:
varient_type=[]
fuel_types=[]
_Nissan["Engine Type"]=_Nissan.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [254]:
_Nissan["Engine Type"].unique()
Out[254]:
array(['Hra0 1.0 Turbo Petrol Engine', 'B4D 1.0 Na Petrol Engine',
        '1.3 Litre Hr13Ddt Turbo Petrol Engine',
        'V6 Twin Turbo Petrol Engine', '1.5 Litre Hr15 Petrol Engine'],
      dtype=object)
In [255]:
#Porsche has no repetition
#Twin Turbo V8 Engine == Twin-turbo V8
In [256]:
main.loc[main.Model=="Porsche"][["Engine Type"]]["Engine Type"].unique()
Out[256]:
array(['Petrol Engine', nan, '4.0 Litre V6 Turbo Engine',
        '2.0L Mid-engine', 'Twin Turbo V8 Engine', 'V6 Petrol Engine',
        'naturally aspirated boxer engine', 'Twin-Turbocharged Engine',
        '4.0L Petrol engine', '3.0 Litre V6 Engine', 'Twin-turbo V8',
        'V6 Diesel Engine', '4.0 V8 twin-turbo', 'V8 Petrol Engine'],
      dtype=object)
In [257]:
_Porsche=main.loc[main.Model=="Porsche"][["Varient","Engine Type"]]
In [258]:
varient_type=[]
fuel_types=[]
_Porsche["Engine Type"]=_Porsche.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [259]:
_Porsche["Engine Type"].unique()
Out[259]:
array(['Petrol Engine', nan, '4.0 Litre V6 Turbo Petrol Engine',
        '2.0 Litre Mid Engine', 'Twin Turbo V8 Petrol Engine',
        'V6 Petrol Engine', 'Naturally Aspirated Boxer Petrol Engine',
        'Twin Turbocharged Petrol Engine', '4.0 Litre Petrol Engine',
        '3.0 Litre V6 Petrol Engine', 'V6 Diesel Engine',
        '4.0 V8 Twin Turbo Petrol Engine', 'V8 Petrol Engine'],
      dtype=object)
In [260]:
#Renault has no repetition
In [261]:
main.loc[main.Model=="Renault"][["Engine Type"]]["Engine Type"].unique()
Out[261]:
array(['1.0L ENERGY', nan, '0.8 SCe', '1.0 SCe', '1.0L TURBO'],
      dtype=object)
In [262]:
_Renault=main.loc[main.Model=="Renault"][["Varient","Engine Type"]]
In [263]:
varient_type=[]
fuel_types=[]
_Renault["Engine Type"]=_Renault.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [264]:
_Renault["Engine Type"].unique()
Out[264]:
array(['1.0 Litre Energy Petrol Engine', nan, '0.8 Sce Petrol Engine',
        '1.0 Sce Petrol Engine', '1.0 Litre Turbo Petrol Engine'],
      dtype=object)
In [265]:
#Rolls-Royce has no repetition
In [266]:
main.loc[main.Model=="Rolls-Royce"][["Engine Type"]]["Engine Type"].unique()
Out[266]:
array(['V12 Petrol Engine', nan, 'V Type Engine', '6.75-litre V12 engine'],
      dtype=object)
In [267]:
_Rolls_Royce=main.loc[main.Model=="Rolls-Royce"][["Varient","Engine Type"]]
In [268]:
varient_type=[]
fuel_types=[]
_Rolls_Royce["Engine Type"]=_Rolls_Royce.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [269]:
_Rolls_Royce["Engine Type"].unique()
Out[269]:
array(['V12 Petrol Engine', nan, 'V Type Diesel Engine',
        '6.75 Litre V12 Petrol Engine'], dtype=object)
In [270]:
#Skoda has no repetition
#1.5 L TSI Petrol==1.5 TSI Petrol Engine==1.5 TSI Petrol Engine
#2.0 L Turbocharged Petrol Engine==2.0L Turbocharged Petrol
#1.0 L TSI Petrol==1.0 TSI Petrol Engine
In [271]:
main.loc[main.Model=="Skoda"][["Engine Type"]]["Engine Type"].unique()
Out[271]:
array(['1.5 L TSI Petrol', '2.0 L Turbocharged Petrol Engine',
        '1.0 L TSI Petrol', '1.0 TSI Petrol Engine', '1.5 TSI Petrol',
        '1.5 TSI Petrol Engine', '1.0 TSI Petrol',
        '2.0 L TSI Petrol Engine', '2.0L Turbocharged Petrol'],
      dtype=object)
In [272]:
_Skoda=main.loc[main.Model=="Skoda"][["Varient","Engine Type"]]
In [273]:
varient_type=[]
fuel_types=[]
_Skoda["Engine Type"]=_Skoda.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [274]:
_Skoda["Engine Type"].unique()
Out[274]:
array(['1.5 Litre Tsi Petrol Engine',
        '2.0 Litre Turbocharged Petrol Engine',
        '1.0 Litre Tsi Petrol Engine', '2.0 Litre Tsi Petrol Engine'],
      dtype=object)
In [275]:
#Tata repetition
#1.2 L Revotron == 1.2 l Revotron Engine == 1.2 l Revotron == 1.2L Revotron Engine
#1.5l Turbocharged Revotorq Engine == 1.5 l Turbocharged Revotorq
In [276]:
main.loc[main.Model=="Tata"][["Engine Type"]]["Engine Type"].unique()
Out[276]:
array(['Kryotec 2.0 L Turbocharged Engine',
        '1.2l Turbocharged Revotron Engine', '1.2 L Revotron',
        '1.2 l Revotron Engine', 'Revotron 1.2 l BS6 Engine', nan,
        '1.2 l Revotron', 'TATA 4SP CR TCIC',
        '1.5l Turbocharged Revotorq Engine', '1.5 l Turbocharged Revotorq',
        '1.2L Revotron Engine', '1.2 l i-Turbo', '1.2 L i-CNG'],
      dtype=object)
In [277]:
_Tata=main.loc[main.Model=="Tata"][["Varient","Engine Type"]]
In [278]:
varient_type=[]
fuel_types=[]
_Tata["Engine Type"]=_Tata.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [279]:
_Tata["Engine Type"].unique()
Out[279]:
array(['Kryotec 2.0 Litre Turbocharged Diesel Engine',
        '1.2 Litre Turbocharged Revotron Petrol Engine',
        '1.2 Litre Revotron Petrol Engine',
        'Revotron 1.2 Litre Bs6 Petrol Engine', nan,
        'Tata 4Sp Cr Tcic Diesel Engine',
        '1.5 Litre Turbocharged Revotorq Diesel Engine',
        '1.2 Litre Revotron Cng Engine', '1.2 Litre I Turbo Petrol Engine',
        '1.2 Litre I Cng Engine'], dtype=object)
In [280]:
#Toyota repetition
#2.8 L Diesel engine == 2.8 L Diesel Engine
#2.7L Dual VVT == 2.7 L Dual VVT
In [281]:
main.loc[main.Model=="Toyota"][["Engine Type"]]["Engine Type"].unique()
Out[281]:
array(['2.8 L Diesel engine', '2.8 L Diesel Engine', '2.7L Dual VVT',
        '1.2 L Petrol Engine', nan, '2.7L Petrol Engine', '2.7 L Dual VVT',
        '2.5L Dynamic Force Engine', 'Gasoline Hybrid and E-FOUR'],
      dtype=object)
In [282]:
_Toyota=main.loc[main.Model=="Toyota"][["Varient","Engine Type"]]
In [283]:
varient_type=[]
fuel_types=[]
_Toyota["Engine Type"]=_Toyota.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [284]:
_Toyota["Engine Type"].unique()
Out[284]:
array(['2.8 Litre Diesel Engine', '2.7 Litre Dual Vvt Petrol Engine',
        '1.2 Litre Petrol Engine', nan, '2.7 Litre Petrol Engine',
        '2.5 Litre Dynamic Force Petrol Engine',
        'Gasoline Hybrid And E Four Petrol Engine'], dtype=object)
In [285]:
#Volkswagen has no repetition
In [286]:
main.loc[main.Model=="Volkswagen"][["Engine Type"]]["Engine Type"].unique()
Out[286]:
array(['1.0L TSI', 'TSI Petrol Engine', '1.5L TSI EVO with ACT',
        '2.0 TSI'], dtype=object)
In [287]:
_Volkswagen=main.loc[main.Model=="Volkswagen"][["Varient","Engine Type"]]
In [288]:
varient_type=[]
fuel_types=[]
_Volkswagen["Engine Type"]=_Volkswagen.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [289]:
_Volkswagen["Engine Type"].unique()
Out[289]:
array(['1.0 Litre Tsi Petrol Engine', 'Tsi Petrol Engine',
        '1.5 Litre Tsi Evo With Act Petrol Engine',
        '2.0 Tsi Petrol Engine'], dtype=object)
In [290]:
#Volvo has no repetition
In [291]:
main.loc[main.Model=="Volvo"][["Engine Type"]]["Engine Type"].unique()
Out[291]:
array(['Twin Turbo & Super Charge Petrol Engine', nan,
        'Turbo Petrol Engine'], dtype=object)
In [292]:
_Volvo=main.loc[main.Model=="Volvo"][["Varient","Engine Type"]]
In [293]:
varient_type=[]
fuel_types=[]
_Volvo["Engine Type"]=_Volvo.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [294]:
_Volvo["Engine Type"].unique()
Out[294]:
array(['Twin Turbo & Super Charge Petrol Engine', nan,
        'Turbo Petrol Engine'], dtype=object)
In [295]:
#Apply the preprocess_Engine_Type function to make transformation
varient_type=[]
fuel_types=[]
main["Engine Type"]=main.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [296]:
main[["Model","Varient","Engine Type","Fuel Type"]].loc[main["Fuel Type"]=="CNG"]
Out[296]:
Model Varient Engine Type Fuel Type
10 Maruti Alto 800 LXI Opt S-CNG F8D Cng Engine CNG
15 Maruti Swift Dzire Tour S CNG (O) NaN CNG
21 Maruti Ertiga ZXI CNG K15C Cng Engine CNG
45 Maruti Wagon R VXI CNG K10C Cng Engine CNG
48 Maruti Super Carry STD CNG Multi Point Fuel Injection G12B Bs—Vi Cng Engine CNG
... ... ... ... ...
154723 Tata Tigor XZ CNG 1.2 Litre Revotron Cng Engine CNG
154733 Tata Tigor XZ Plus CNG 1.2 Litre Revotron Cng Engine CNG
154736 Tata Tigor XM CNG 1.2 Litre Revotron Cng Engine CNG
154751 Tata Tiago XZ Plus CNG 1.2 Litre I Cng Engine CNG
154772 Tata Tigor XZ Plus CNG 1.2 Litre Revotron Cng Engine CNG

3888 rows × 4 columns

In [297]:
main.columns[28]
Out[297]:
'Displacement (cc)'
In [298]:
main["Displacement (cc)"]
Out[298]:
0         1197.0
1         1196.0
2         1462.0
3         1197.0
4          998.0
            ...  
160026       NaN
160027    1969.0
160028    1969.0
160029    1969.0
160030    1969.0
Name: Displacement (cc), Length: 160031, dtype: float64
In [299]:
main.columns[29]
Out[299]:
'Max Power'
In [300]:
main["Max Power"]
Out[300]:
0          88.50bhp@6000rpm
1          72.41bhp@6000rpm
2         103.25bhp@6000rpm
3          88.50bhp@6000rpm
4          65.71bhp@5500rpm
                ...        
160026            402.30bhp
160027                  NaN
160028            246.58Bhp
160029                  NaN
160030                  NaN
Name: Max Power, Length: 160031, dtype: object
In [301]:
#Max power is same as the Max Power (bhp@rpm) so dropit,actually this information is present in both section 
main.drop(["Max Power"],axis=1,inplace=True)
In [302]:
main.columns[29]
Out[302]:
'Max Torque'
In [303]:
main["Max Torque"]
Out[303]:
0         113Nm@4400rpm
1          98Nm@3000rpm
2         138Nm@4400rpm
3         113Nm@4400rpm
4          89Nm@3500rpm
              ...      
160026            660Nm
160027              NaN
160028            350Nm
160029              NaN
160030              NaN
Name: Max Torque, Length: 160031, dtype: object
In [304]:
main.drop(["Max Torque"],axis=1,inplace=True)
In [305]:
main.columns[29]
Out[305]:
'Valves Per Cylinder'
In [306]:
main["Valves Per Cylinder"]
Out[306]:
0         4.0
1         4.0
2         4.0
3         4.0
4         4.0
          ... 
160026    NaN
160027    4.0
160028    4.0
160029    4.0
160030    4.0
Name: Valves Per Cylinder, Length: 160031, dtype: float64
In [307]:
main.columns[30]
Out[307]:
'Gear Box'
In [308]:
main["Gear Box"]
Out[308]:
0                           5 Speed
1                           5 Speed
2                           5 Speed
3                           5 Speed
4                           5 Speed
                    ...            
160026    single speed transmission
160027                          NaN
160028                      8-speed
160029                          NaN
160030                          NaN
Name: Gear Box, Length: 160031, dtype: object
In [309]:
main["Gear Box"].unique()
Out[309]:
array(['5 Speed', '5-Speed', '4 Speed', '6-Speed', 'AGS', nan, '6 Speed',
        '6 Speed Automatic', '10 speed', '10 Speed', '7-speed Stronic',
        '7-speed DCT', '7 Speed', '8 Speed',
        '7 speed S-tronic transmission', '8-speed tiptronic/automatic',
        '8-speed tiptronic', '8 speed', '8-Speed Steptronic', '8-Speed',
        '7-Speed', '7-Speed Steptronic',
        '8-Speed Steptronic Sport Automatic Transmission', 'Single speed',
        '8-Speed Automatic Transmission', '8-speed Steptronic Automatic',
        '8-speed M-Steptronic', '8-speed DCT', '8-speed', 'CVT',
        '6-speed IVT', '7-Speed DCT', 'iMT', '7 Speed DCT', '6-speed',
        '6 Speed IMT', '6 Speed IVT', 'IVT', '6-speed iMT', '9-Speed',
        '9 Speed', '6-Speed iMT', '7 Speed dual clutch transmission',
        '7 Speed LDF DCT', '8', 'E-CVT', '6-speed AutoSHIFT',
        'Mercedes Benz 7 Speed Automatic', 'Fully Automatic',
        'AMG Speedshift 9G TCT Automatic', '9-speed automatic',
        '9G-TRONIC', 'AMG 8 Speed DCT', 'Single-speed transmission',
        'MCT 9-Speed', '9G TRONIC', '9-speed', 'AMG MCT 9G Sport',
        'AMG 7-SPEED DCT', 'AMG TCT 9G', 'AMG SPEEDSHIFT DCT 8G',
        'SPEEDSHIFT TCT 9G', '9G-TRONIC automatic',
        'SPEEDSHIFT TCT 9-speed', '9 speed Tronic',
        '7-Speed DCT dual-clutch', '8G-DCT', 'AMG SPEEDSHIFT MCT 9G',
        '7 Speed 9G-Tronic automatic', '7G-DCT', '8-Speed DCT',
        '6-speed CVT', '8 Speed CVT', '8-speed Steptronic Transmission',
        '7-Speed DCT Steptronic', 'Six Speed Manual with Paddle Shifter',
        '6', '7-speed PDK', '2-speed transmission', '8-speed Tiptronic S',
        '8 SpeedPDK', '8-speed Porsche Doppelkupplung',
        '8-speed Tiptronic S with shift-by-wire', '7-Speed DSG', '7-speed',
        '7-speed DSG', '6-Speed DCT', '6 Speed iMT',
        '6 Speed with Sequential Shift', '8Speed',
        'single speed transmission'], dtype=object)
In [310]:
def preprocess_gear_box(text):
    list_of_words_to_remove=[" Automatic"," Transmission","/Automatic"]
    Activation=False
    text=str(text)
    if text=="nan":
        return np.nan
    text=text.title()
    if "G" in text:
        try:
            int(text[text.find("G")-1])
            get_index=text.find("G")
            text=text.replace("G","")
            Activation=True
        except:
            pass
    if "-" in text:
        try:
            pre=int(text[text.find("-")-1])
            next_=str(text[text.find("-")+1])
            text=text.replace("-"," ")
        except:
            text=text.replace("-","")
    text=text.title()
    for i in list_of_words_to_remove:
        if i in text:
            text=text.replace("{}".format(i),"")
    if len(text)==1:
        try:
            checking=int(text)
            text=text+" Speed"
        except:
            pass
    try:
        int(text[0])
        if text[1]!=" ":
            text=text[:1]+"-"+text[1:]
    except:
        pass
    if "-" in text:
        text=text.replace("-"," ")
    if Activation==True:
        text=text[:get_index]+"G"+text[get_index:]
    return text
In [311]:
#Maruti
In [312]:
main.loc[main["Model"]=="Maruti"][["Gear Box"]]["Gear Box"].unique()
Out[312]:
array(['5 Speed', '5-Speed', '4 Speed', '6-Speed', 'AGS', nan],
      dtype=object)
In [313]:
com=main.loc[main["Model"]=="Maruti"]["Gear Box"].apply(preprocess_gear_box)
In [314]:
com.unique()
Out[314]:
array(['5 Speed', '4 Speed', '6 Speed', 'Ags', 'Nan'], dtype=object)
In [315]:
#Ford
In [316]:
main.loc[main["Model"]=="Ford"][["Gear Box"]]["Gear Box"].unique()
Out[316]:
array(['5 Speed', '6 Speed', nan, '5-Speed', '6 Speed Automatic',
        '10 speed', '10 Speed'], dtype=object)
In [317]:
com=main.loc[main["Model"]=="Ford"]["Gear Box"].apply(preprocess_gear_box)
In [318]:
com.unique()
Out[318]:
array(['5 Speed', '6 Speed', 'Nan', '1 0 Speed'], dtype=object)
In [319]:
#Audi
In [320]:
main.loc[main["Model"]=="Audi"][["Gear Box"]]["Gear Box"].unique()
Out[320]:
array([nan, '7-speed Stronic', '7-speed DCT', '7 Speed', '8 Speed',
        '7 speed S-tronic transmission', '8-speed tiptronic/automatic',
        '8-speed tiptronic'], dtype=object)
In [321]:
com=main.loc[main["Model"]=="Audi"]["Gear Box"].apply(preprocess_gear_box)
In [322]:
com.unique()
Out[322]:
array(['Nan', '7 Speed Stronic', '7 Speed Dct', '7 Speed', '8 Speed',
        '8 Speed Tiptronic'], dtype=object)
In [323]:
#Bentley
In [324]:
main.loc[main["Model"]=="Bentley"][["Gear Box"]]["Gear Box"].unique()
Out[324]:
array(['8 Speed', '8 speed'], dtype=object)
In [325]:
com=main.loc[main["Model"]=="Bentley"]["Gear Box"].apply(preprocess_gear_box)
In [326]:
com.unique()
Out[326]:
array(['8 Speed'], dtype=object)
In [327]:
#Force
In [328]:
main.loc[main["Model"]=="Force"][["Gear Box"]]["Gear Box"].unique()
Out[328]:
array(['5 Speed'], dtype=object)
In [329]:
com=main.loc[main["Model"]=="Force"]["Gear Box"].apply(preprocess_gear_box)
In [330]:
com.unique()
Out[330]:
array(['5 Speed'], dtype=object)
In [331]:
#Ferrari
In [332]:
main.loc[main["Model"]=="Ferrari"][["Gear Box"]]["Gear Box"].unique()
Out[332]:
array(['7 Speed', '8-speed DCT', '8 speed', nan, '8-speed'], dtype=object)
In [333]:
com=main.loc[main["Model"]=="Ferrari"]["Gear Box"].apply(preprocess_gear_box)
In [334]:
com.unique()
Out[334]:
array(['7 Speed', '8 Speed Dct', '8 Speed', 'Nan'], dtype=object)
In [335]:
#Honda
In [336]:
main.loc[main["Model"]=="Honda"][["Gear Box"]]["Gear Box"].unique()
Out[336]:
array(['7 Speed', '6 Speed', 'CVT', '5 Speed', nan, '5-Speed'],
      dtype=object)
In [337]:
com=main.loc[main["Model"]=="Honda"]["Gear Box"].apply(preprocess_gear_box)
In [338]:
com.unique()
Out[338]:
array(['7 Speed', '6 Speed', 'Cvt', '5 Speed', 'Nan'], dtype=object)
In [339]:
#Hyundai
In [340]:
main.loc[main["Model"]=="Hyundai"][["Gear Box"]]["Gear Box"].unique()
Out[340]:
array(['6-speed IVT', '5-Speed', '5 Speed', '7-Speed DCT', '6 Speed',
        'iMT', '7 Speed DCT', '6-speed', '6-Speed', '6 Speed IMT',
        '6 Speed IVT', 'IVT', '7-speed DCT', '8-Speed', '7 Speed',
        '6-speed iMT'], dtype=object)
In [341]:
com=main.loc[main["Model"]=="Hyundai"]["Gear Box"].apply(preprocess_gear_box)
In [342]:
com.unique()
Out[342]:
array(['6 Speed Ivt', '5 Speed', '7 Speed Dct', '6 Speed', 'Imt',
        '6 Speed Imt', 'Ivt', '8 Speed', '7 Speed'], dtype=object)
In [343]:
#Isuzu
In [344]:
main.loc[main["Model"]=="Isuzu"][["Gear Box"]]["Gear Box"].unique()
Out[344]:
array(['6-Speed', '6 Speed'], dtype=object)
In [345]:
com=main.loc[main["Model"]=="Isuzu"]["Gear Box"].apply(preprocess_gear_box)
In [346]:
com.unique()
Out[346]:
array(['6 Speed'], dtype=object)
In [347]:
#Jeep
In [348]:
main.loc[main["Model"]=="Jeep"][["Gear Box"]]["Gear Box"].unique()
Out[348]:
array(['7 Speed DCT', '9-Speed', '7 Speed', '6-Speed', '8 Speed',
        '6 Speed', nan, '9 Speed', '7-Speed DCT'], dtype=object)
In [349]:
com=main.loc[main["Model"]=="Jeep"]["Gear Box"].apply(preprocess_gear_box)
In [350]:
com.unique()
Out[350]:
array(['7 Speed Dct', '9 Speed', '7 Speed', '6 Speed', '8 Speed', 'Nan'],
      dtype=object)
In [351]:
#Kia
In [352]:
main.loc[main["Model"]=="Kia"][["Gear Box"]]["Gear Box"].unique()
Out[352]:
array(['6-Speed', '7-Speed DCT', '6-Speed iMT', '5-Speed', nan, '8-Speed',
        'IVT'], dtype=object)
In [353]:
com=main.loc[main["Model"]=="Kia"]["Gear Box"].apply(preprocess_gear_box)
In [354]:
com.unique()
Out[354]:
array(['6 Speed', '7 Speed Dct', '6 Speed Imt', '5 Speed', 'Nan',
        '8 Speed', 'Ivt'], dtype=object)
In [355]:
#Lamborghini
In [356]:
main.loc[main["Model"]=="Lamborghini"][["Gear Box"]]["Gear Box"].unique()
Out[356]:
array(['7 Speed dual clutch transmission', '7 Speed LDF DCT', '7 Speed',
        '8'], dtype=object)
In [357]:
com=main.loc[main["Model"]=="Lamborghini"]["Gear Box"].apply(preprocess_gear_box)
In [358]:
com.unique()
Out[358]:
array(['7 Speed Dual Clutch', '7 Speed Ldf Dct', '7 Speed', '8 Speed'],
      dtype=object)
In [359]:
#Land_Rover
In [360]:
main.loc[main["Model"]=="Land_Rover"][["Gear Box"]]["Gear Box"].unique()
Out[360]:
array(['8-Speed Automatic Transmission', nan, '8 Speed', '9 Speed'],
      dtype=object)
In [361]:
com=main.loc[main["Model"]=="Land_Rover"]["Gear Box"].apply(preprocess_gear_box)
In [362]:
com.unique()
Out[362]:
array(['8 Speed', 'Nan', '9 Speed'], dtype=object)
In [363]:
#Lexus
In [364]:
main.loc[main["Model"]=="Lexus"][["Gear Box"]]["Gear Box"].unique()
Out[364]:
array([nan, '10 Speed', '10 speed', '8 Speed', 'E-CVT'], dtype=object)
In [365]:
com=main.loc[main["Model"]=="Lexus"]["Gear Box"].apply(preprocess_gear_box)
In [366]:
com.unique()
Out[366]:
array(['Nan', '1 0 Speed', '8 Speed', 'Ecvt'], dtype=object)
In [367]:
#Mahindra
In [368]:
main.loc[main["Model"]=="Mahindra"][["Gear Box"]]["Gear Box"].unique()
Out[368]:
array(['6-speed AutoSHIFT', '6-Speed', '6 Speed',
        'Mercedes Benz 7 Speed Automatic', nan, '5 Speed', '5-Speed',
        '6-speed', 'Fully Automatic'], dtype=object)
In [369]:
com=main.loc[main["Model"]=="Mahindra"]["Gear Box"].apply(preprocess_gear_box)
In [370]:
com.unique()
Out[370]:
array(['6 Speed Autoshift', '6 Speed', 'Mercedes Benz 7 Speed', 'Nan',
        '5 Speed', 'Fully'], dtype=object)
In [371]:
#Aston_Martin
In [372]:
main.loc[main["Model"]=="Aston_Martin"][["Gear Box"]]["Gear Box"].unique()
Out[372]:
array(['AMG Speedshift 9G TCT Automatic', '9-speed automatic'],
      dtype=object)
In [373]:
com=main.loc[main["Model"]=="Aston_Martin"]["Gear Box"].apply(preprocess_gear_box)
In [374]:
com.unique()
Out[374]:
array(['Amg Speedshift 9G Tct', '9 Speed'], dtype=object)
In [375]:
#Maserati
In [376]:
main.loc[main["Model"]=="Maserati"][["Gear Box"]]["Gear Box"].unique()
Out[376]:
array(['8 Speed', nan, '6 Speed'], dtype=object)
In [377]:
com=main.loc[main["Model"]=="Maserati"]["Gear Box"].apply(preprocess_gear_box)
In [378]:
com.unique()
Out[378]:
array(['8 Speed', 'Nan', '6 Speed'], dtype=object)
In [379]:
#Mercedes-Benz
In [380]:
main.loc[main["Model"]=="Mercedes-Benz"][["Gear Box"]]["Gear Box"].unique()
Out[380]:
array(['9G-TRONIC', '9-Speed', 'AMG 8 Speed DCT',
        'Single-speed transmission', 'MCT 9-Speed', '9G TRONIC', '9-speed',
        'AMG MCT 9G Sport', nan, '9 Speed', 'AMG 7-SPEED DCT',
        'AMG TCT 9G', '7-Speed DCT', 'AMG SPEEDSHIFT DCT 8G',
        'AMG Speedshift 9G TCT Automatic', 'SPEEDSHIFT TCT 9G',
        '9G-TRONIC automatic', '7 Speed', 'SPEEDSHIFT TCT 9-speed',
        '9 speed Tronic', '7-Speed DCT dual-clutch', '8G-DCT',
        'AMG SPEEDSHIFT MCT 9G', '7 Speed 9G-Tronic automatic', '7G-DCT',
        '8-Speed DCT'], dtype=object)
In [381]:
com=main.loc[main["Model"]=="Mercedes-Benz"]["Gear Box"].apply(preprocess_gear_box)
In [382]:
com.unique()
Out[382]:
array(['9G Tronic', '9 Speed', 'Amg 8 Speed Dct', 'Singlespeed',
        'Mct 9 Speed', 'Amg Mct 9G Sport', 'Nan', 'Amg 7 Speed Dct',
        'Amg Tct 9G', '7 Speed Dct', 'Amg Speedshift Dct 8G',
        'Amg Speedshift 9G Tct', 'Speedshift Tct 9G', '7 Speed',
        'Speedshift Tct 9 Speed', '9 Speed Tronic',
        '7 Speed Dct Dual Clutch', '8G Dct', 'Amg Speedshift Mct 9G',
        '7 Speed 9G Tronic', '7G Dct', '8 Speed Dct'], dtype=object)
In [383]:
#MG
In [384]:
main.loc[main["Model"]=="MG"][["Gear Box"]]["Gear Box"].unique()
Out[384]:
array(['6-Speed', '6-speed', '6-speed CVT', '8-Speed', '8 Speed',
        '6 Speed', '5 Speed', '8 Speed CVT', nan, '8-speed'], dtype=object)
In [385]:
com=main.loc[main["Model"]=="MG"]["Gear Box"].apply(preprocess_gear_box)
In [386]:
com.unique()
Out[386]:
array(['6 Speed', '6 Speed Cvt', '8 Speed', '5 Speed', '8 Speed Cvt',
        'Nan'], dtype=object)
In [387]:
#Mini
In [388]:
main.loc[main["Model"]=="Mini"][["Gear Box"]]["Gear Box"].unique()
Out[388]:
array(['7 Speed', '8-speed Steptronic Transmission', nan,
        '7-Speed DCT Steptronic'], dtype=object)
In [389]:
com=main.loc[main["Model"]=="Mini"]["Gear Box"].apply(preprocess_gear_box)
In [390]:
com.unique()
Out[390]:
array(['7 Speed', '8 Speed Steptronic', 'Nan', '7 Speed Dct Steptronic'],
      dtype=object)
In [391]:
#Mitsubishi
In [392]:
main.loc[main["Model"]=="Mitsubishi"][["Gear Box"]]["Gear Box"].unique()
Out[392]:
array(['5 Speed', 'Six Speed Manual with Paddle Shifter', nan, '6 Speed',
        '6'], dtype=object)
In [393]:
com=main.loc[main["Model"]=="Mitsubishi"]["Gear Box"].apply(preprocess_gear_box)
In [394]:
com.unique()
Out[394]:
array(['5 Speed', 'Six Speed Manual With Paddle Shifter', 'Nan',
        '6 Speed'], dtype=object)
In [395]:
#Nissan
In [396]:
main.loc[main["Model"]=="Nissan"][["Gear Box"]]["Gear Box"].unique()
Out[396]:
array(['5 Speed', 'CVT', '6-Speed', '6 Speed', '5-Speed'], dtype=object)
In [397]:
com=main.loc[main["Model"]=="Nissan"]["Gear Box"].apply(preprocess_gear_box)
In [398]:
com.unique()
Out[398]:
array(['5 Speed', 'Cvt', '6 Speed'], dtype=object)
In [399]:
#Porsche
In [400]:
main.loc[main["Model"]=="Porsche"][["Gear Box"]]["Gear Box"].unique()
Out[400]:
array([nan, '8-Speed', '7-speed PDK', '2-speed transmission',
        '8-speed Tiptronic S', '6 Speed', '8 SpeedPDK', '8 Speed',
        '7 Speed', '8-speed Porsche Doppelkupplung',
        '8-speed Tiptronic S with shift-by-wire'], dtype=object)
In [401]:
com=main.loc[main["Model"]=="Porsche"]["Gear Box"].apply(preprocess_gear_box)
In [402]:
com.unique()
Out[402]:
array(['Nan', '8 Speed', '7 Speed Pdk', '2 Speed', '8 Speed Tiptronic S',
        '6 Speed', '8 Speedpdk', '7 Speed',
        '8 Speed Porsche Doppelkupplung',
        '8 Speed Tiptronic S With Shift By Wire'], dtype=object)
In [403]:
#Renault
In [404]:
main.loc[main["Model"]=="Renault"][["Gear Box"]]["Gear Box"].unique()
Out[404]:
array(['5-Speed', '5 Speed', 'CVT'], dtype=object)
In [405]:
com=main.loc[main["Model"]=="Renault"]["Gear Box"].apply(preprocess_gear_box)
In [406]:
com.unique()
Out[406]:
array(['5 Speed', 'Cvt'], dtype=object)
In [407]:
#Rolls-Royce
In [408]:
main.loc[main["Model"]=="Rolls-Royce"][["Gear Box"]]["Gear Box"].unique()
Out[408]:
array(['8 Speed', nan], dtype=object)
In [409]:
com=main.loc[main["Model"]=="Rolls-Royce"]["Gear Box"].apply(preprocess_gear_box)
In [410]:
com.unique()
Out[410]:
array(['8 Speed', 'Nan'], dtype=object)
In [411]:
#Skoda
In [412]:
main.loc[main["Model"]=="Skoda"][["Gear Box"]]["Gear Box"].unique()
Out[412]:
array(['6-Speed', '7-Speed DSG', '6-speed', '7-Speed', '7-speed',
        '7-speed DSG'], dtype=object)
In [413]:
com=main.loc[main["Model"]=="Skoda"]["Gear Box"].apply(preprocess_gear_box)
In [414]:
com.unique()
Out[414]:
array(['6 Speed', '7 Speed Dsg', '7 Speed'], dtype=object)
In [415]:
#Tata
In [416]:
main.loc[main["Model"]=="Tata"][["Gear Box"]]["Gear Box"].unique()
Out[416]:
array(['6-Speed', '5-Speed', '5 Speed', '6-Speed DCT', 'Single speed',
        nan], dtype=object)
In [417]:
com=main.loc[main["Model"]=="Tata"]["Gear Box"].apply(preprocess_gear_box)
In [418]:
com.unique()
Out[418]:
array(['6 Speed', '5 Speed', '6 Speed Dct', 'Single Speed', 'Nan'],
      dtype=object)
In [419]:
#Toyota
In [420]:
main.loc[main["Model"]=="Toyota"][["Gear Box"]]["Gear Box"].unique()
Out[420]:
array(['6 Speed iMT', '6-Speed iMT', '6 Speed with Sequential Shift',
        '6 Speed', '5-Speed', nan, '6-Speed', '5 Speed'], dtype=object)
In [421]:
com=main.loc[main["Model"]=="Toyota"]["Gear Box"].apply(preprocess_gear_box)
In [422]:
com.unique()
Out[422]:
array(['6 Speed Imt', '6 Speed With Sequential Shift', '6 Speed',
        '5 Speed', 'Nan'], dtype=object)
In [423]:
#Volkswagen
In [424]:
main.loc[main["Model"]=="Volkswagen"][["Gear Box"]]["Gear Box"].unique()
Out[424]:
array(['6-Speed', '6 Speed', '7-Speed DSG', '7 Speed DCT'], dtype=object)
In [425]:
com=main.loc[main["Model"]=="Volkswagen"]["Gear Box"].apply(preprocess_gear_box)
In [426]:
com.unique()
Out[426]:
array(['6 Speed', '7 Speed Dsg', '7 Speed Dct'], dtype=object)
In [427]:
#Volvo
In [428]:
main.loc[main["Model"]=="Volvo"][["Gear Box"]]["Gear Box"].unique()
Out[428]:
array(['8 Speed', '8-speed', nan, '8Speed', 'single speed transmission'],
      dtype=object)
In [429]:
com=main.loc[main["Model"]=="Volvo"]["Gear Box"].apply(preprocess_gear_box)
In [430]:
com.unique()
Out[430]:
array(['8 Speed', 'Nan', 'Single Speed'], dtype=object)
In [431]:
#Apply this function to all brands
main["Gear Box"]=main["Gear Box"].apply(preprocess_gear_box)
In [432]:
main["Gear Box"].unique()
Out[432]:
array(['5 Speed', '4 Speed', '6 Speed', 'Ags', 'Nan', '1 0 Speed',
        '7 Speed Stronic', '7 Speed Dct', '7 Speed', '8 Speed',
        '8 Speed Tiptronic', '8 Speed Steptronic', '7 Speed Steptronic',
        '8 Speed Steptronic Sport', 'Single Speed', '8 Speed M Steptronic',
        '8 Speed Dct', 'Cvt', '6 Speed Ivt', 'Imt', '6 Speed Imt', 'Ivt',
        '9 Speed', '7 Speed Dual Clutch', '7 Speed Ldf Dct', 'Ecvt',
        '6 Speed Autoshift', 'Mercedes Benz 7 Speed', 'Fully',
        'Amg Speedshift 9G Tct', '9G Tronic', 'Amg 8 Speed Dct',
        'Singlespeed', 'Mct 9 Speed', 'Amg Mct 9G Sport',
        'Amg 7 Speed Dct', 'Amg Tct 9G', 'Amg Speedshift Dct 8G',
        'Speedshift Tct 9G', 'Speedshift Tct 9 Speed', '9 Speed Tronic',
        '7 Speed Dct Dual Clutch', '8G Dct', 'Amg Speedshift Mct 9G',
        '7 Speed 9G Tronic', '7G Dct', '6 Speed Cvt', '8 Speed Cvt',
        '7 Speed Dct Steptronic', 'Six Speed Manual With Paddle Shifter',
        '7 Speed Pdk', '2 Speed', '8 Speed Tiptronic S', '8 Speedpdk',
        '8 Speed Porsche Doppelkupplung',
        '8 Speed Tiptronic S With Shift By Wire', '7 Speed Dsg',
        '6 Speed Dct', '6 Speed With Sequential Shift'], dtype=object)
In [433]:
main.columns[31]
Out[433]:
'Report Incorrect Specs'
In [434]:
main.drop(["Report Incorrect Specs"],axis=1,inplace=True)
In [435]:
main.columns[32]
Out[435]:
'Petrol Fuel Tank Capacity (Litres)'
In [436]:
main["Petrol Fuel Tank Capacity (Litres)"]
Out[436]:
0         37.0
1         40.0
2         48.0
3         37.0
4         27.0
          ... 
160026     NaN
160027     NaN
160028    60.0
160029     NaN
160030     NaN
Name: Petrol Fuel Tank Capacity (Litres), Length: 160031, dtype: float64
In [437]:
main.columns[33]
Out[437]:
'Emission Norm Compliance'
In [438]:
main["Emission Norm Compliance"]
Out[438]:
0         BS VI
1         BS VI
2         BS VI
3         BS VI
4         BS VI
          ...  
160026      ZEV
160027      NaN
160028    BS VI
160029      NaN
160030      NaN
Name: Emission Norm Compliance, Length: 160031, dtype: object
In [439]:
main["Emission Norm Compliance"].unique()
Out[439]:
array(['BS VI', nan, 'BS IV', 'Bharat Stage III', 'BSIV',
        'Bharat Stage IV', 'ZEV', 'Euro VI', 'BS III', 'Euro IV', 'SOHC'],
      dtype=object)
In [440]:
def preprocess_emission_norm_compliance(text):
    text=str(text)
    if text=="nan":
        return np.nan
    if "Bharat Stage" in text:
        text=text.replace("Bharat Stage","BS")
    if "IV" in text:
        if text[text.find("IV")-1]!=" ":
            text=text.replace("IV"," IV")
    return text
In [441]:
main["Emission Norm Compliance"]=main["Emission Norm Compliance"].apply(preprocess_emission_norm_compliance)
In [442]:
main.columns[34]
Out[442]:
'Front Suspension'
In [443]:
main["Front Suspension"]
Out[443]:
0                         Mac Pherson Strut
1                          Macpherson Strut
2          McPherson Strut with coil spring
3                         Mac Pherson Strut
4         MacPherson Strut with Coil Spring
                        ...                
160026             sophisticated Suspension
160027                                  NaN
160028                                  NaN
160029                                  NaN
160030                                  NaN
Name: Front Suspension, Length: 160031, dtype: object
In [444]:
main["Front Suspension"].unique()
Out[444]:
array(['Mac Pherson Strut', 'Macpherson Strut',
        'McPherson Strut with coil spring',
        'MacPherson Strut with Coil Spring',
        'Mac Pherson Strut with Coil Spring', 'McPherson',
        'McPherson Strut', 'Mac Pherson Strut & Coil Spring',
        'MacPherson Strut', 'MacPherson  Strut with Coil Spring',
        'Mac Pherson Strut & Coil', nan,
        'Independent MacPherson Strut with coil spring and anti-roll bar',
        'Independent McPherson Strut with Dual Path Mounts',
        'Independent MacPherson struts', 'Independent MacPherson Strut',
        'Independent Mcpherson',
        'Independent Coil Spring With Anti-Roll Bar',
        'Independent McPherson Strut with Coil Spring',
        'Independent McPherson Strut with Coil spring & Anti-roll bar',
        'Independent McPherson strut with coil spring',
        'Double Ball Joint MacPherson Strut with Stabilizer Bar',
        'Independent McPherson struts with offset coil spring',
        'Independent McPherson',
        'Independent McPherson struts with offset coil spring & stabiliser bar',
        'Independent Double Wishbone With Torsion Bar Spring & Stabilizer Bar',
        'Independent Coil Spring with Anti-roll Bar',
        'Independent McPherson Strut With Coil Spring & Anti-Roll Bar',
        'Double Wishbone',
        'Independent double wishbone with torsion bar spring & stabilizer bar',
        'Independent McPherson Strut with Coil Spring & Anti-Roll Bar',
        'Air Suspension', 'underbody guard with heavy-duty',
        'Five-link front suspension; tubular anti-roll bar', 'RS Sports',
        'Five-link front axle; tubular anti-roll bar; air spring suspension',
        'Sport Adaptive Air Suspension', 'Adaptive Air Suspension',
        'RS Sports Suspension plus with DRC', 'S Sports suspension',
        'Four link Double Wishbones', 'Adaptive 2-axle Air Suspension',
        'Adaptive M-specific Suspension', 'Adaptive M suspension',
        'Double Joint Spring Strut',
        'Single-joint spring strut axle in lightweight aluminium-steel construction',
        'M Sport Suspension', 'independent damping', 'AIRMATIC suspension',
        'Adaptive air suspension',
        'Adaptive Suspension with variable shock absorber',
        'Adaptive Suspension', 'Adaptive M-Specific Suspension',
        'Adaptive M Suspension', 'Air-Suspension',
        'Independent Double wishbones',
        'Independent double wishbone with coil spring',
        'independent,double wishbones(Adaptive Dampers)',
        'Magnetorheological damper', 'adaptive magnetic suspension',
        'MacPherson Strut,Coil Spring', 'McPherson Strut with Coil Spring',
        'MacPherson Strut, Coil Spring', 'McPherson Strut, Coil Spring',
        'Mcpherson Strut Coil Spring', 'McPherson strut with coil spring',
        'McPherson strut', 'Mcpherson Strut',
        'Independent Double wishbone coil springs gas shock absorbers stabiliser bar',
        'Independent Double Wishbone,Coil Spring',
        'Double Wishbone, Coil Spring',
        'Independent Double Wishbone, Coil Spring',
        'McPherson Strut with Lower Control Arm',
        'Mcpherson Strut with Frequency Selective Damping, HRS with Anti Roll Bar',
        'Independent double wishbone',
        'Mcpherson Strut with Lower Control Arm',
        'Mcpherson strut with coil spring',
        'Macpherson Strut with coil spring', 'McPherson suspension',
        'The Magneto Rheological Suspension',
        'Aluminum double-wishbone suspension',
        'Push rod magneto-rheologic active with horizontal dampers',
        'adaptive air suspension', 'Electronic Air Suspension',
        'Double wishbones Coil Suspension', 'Multi-link type,coil springs',
        'AIR Adaptive Variable Suspension', 'MacPherson Struts',
        'MacPherson struts', 'Double Wishbone with Stabilizer',
        'MacPherson Strut with anti-roll bar',
        'Double Wishbone Suspension with Coil over Shocks with FDD & MTV-CL',
        'Double Wishbone with Coil Spring', 'IFS Coil Spring',
        'McPherson Strut Independent Suspension with FSD and Stabilizer bar',
        'Independent McPherson Strut with Dual Path Mounts, Coil Spring',
        'Independent Double Wishbone Front Suspension with Coil Over Damper & Stabiliser Bar',
        'MacPherson Strut with anti-roll bar`',
        'Rigid axle with leaf spring',
        'MacPherson Type with Wishbone Link',
        'Double Wish-bone Type, Independent Front Coil Spring',
        'independent double wishbone design coil springs, anti-roll bar and adaptive damping',
        'Independent double wishbone, coil springs, anti-roll bar and adaptive dampers',
        'Air Adaptive Suspensions',
        'Quattroporte Sport GT S is fitted with the single-setting racing-style suspension system',
        'AIRMATIC', 'AMG Suspension', 'AMG RIDE CONTROL sports suspension',
        'Adaptive Damping System', 'AMG RIDE CONTROL+',
        'four-link axle suspension',
        'SUSPENSION WITH  ADAPTIVE DAMPING SYSTEM', 'ADAPTIVE DAMPING',
        'AMG RIDE CONTROL suspension',
        'active roll stabilization intelligent suspension',
        'Agility control', 'AMG RIDE CONTROL',
        'DYNAMIC BODY CONTROL suspension', 'Independent Suspension',
        'Rigid Leaf Spring', 'adaptive damping Suspension',
        'Macpherson Strut with Stabilizer bar',
        'Mcpherson Strut with Stablizer bar',
        'Dual Helix Independent Suspension',
        'single joint spring-strut front axle', 'Sport Suspension',
        'MacPherson Struct', 'Single-Link Spring-Strut',
        'McPherson Strut with coil spring & stabilizer bar',
        'MacPherson Coil Springs with Stablizer Bar',
        'Double wishbone torsion bar with stabiliser bar',
        'Independent, Double wishbone coil springs with stabilizer bar',
        'McPherson Strut with coil spring & Stabilizer bar',
        'McPherson Strut With Bilstein Shock Absorbers,Eibach Springs & Stabilizer Bar',
        'Independent Double Wishbone Coil Spring With Stabilizer Bar',
        'McPherson Strut & Coil Spring',
        'MacPherson strut with coil springs and stabilizer bar',
        'Double Wishbone With Coil Spring Suspension',
        'Mac Pherson strut with Lower Transverse link',
        'Aluminum double-wishbone, independent wheel suspension',
        'McPherson spring-strut', 'Double wishbone', 'Active Suspension',
        'spring-strut suspension', 'Aluminium double-wishbone front axle',
        'Lightweight spring-strut suspension',
        'Adaptive air suspension including Porsche Active Suspension',
        'Macpherson strut with lower triangle & coil spring',
        'Mac Pherson strut with lower Transverse link',
        'Double wishbone front axle',
        'McPherson suspension with lower triangular links and stabiliser bar',
        'McPherson suspension with lower triangular links and torsion stabiliser',
        'MacPherson suspension',
        'Independent Lower Wishbone McPherson Strut with Coil Spring & Anti Roll Bar',
        'Independent Lower Wishbone McPherson Strut with Coil Spring',
        'Independent Lower Wishbone McPherson Dual Path Strut',
        'Independent, Lower Wishbone, Mcpherson Strut With Coil Spring',
        'Independent MacPherson dual path strut with coil spring',
        'Independent, Lower wishbone, McPherson Strut with coil spring',
        'Independent MacPherson strut with coil spring',
        'Independent MacPherson Dual Path Strut with Coil Spring',
        'Semi-elliptical leaf springs-6leaves',
        'Independent, Lower Wishbone, McPherson Strut with Coil Spring',
        'Double Wishbone With Torsion Bar',
        'McPherson suspension and stabiliser bar',
        'McPherson strut with stabilizer bar',
        'Mc-Pherson suspension and stabiliser bar',
        'Independent suspension with coil spring', 'Air',
        'sophisticated Suspension'], dtype=object)
In [20]:
def preprocess_Front_Suspension(text):
    text=str(text)
    if text=="nan":
        return np.nan
    #Two exceptional case occcured in Volkswagen
    if "Mc-Pherson suspension and stabiliser bar" in text or "McPherson suspension and stabiliser bar" in text:
        return "Mac Pherson Strut With Stabilizer Bar Front Suspension"
    text=text.title()
    if text.find("c")!=-1 and (text.find("P")!=-1 or text.find("p")!=-1):
        try:
            checking=text.find("a")
            if checking==-1:
                raise Exception("a Not Present")
            if checking>21:
                raise Exception("a is Present in outofbound")
            if "Pherson" in text or "pherson" in text:
                text=text[:text.find("c")+1]+" "+text[text.find("c")+1:]
        except:
            if "Pherson" in text or "pherson" in text: 
                text=text[:text.find("c")]+"a"+text[text.find("c"):]
                text=text[:text.find("c")+1]+" "+text[text.find("c")+1:]
    else:
        if "Independent" in text and "Coil" in text and "Bar" in text and "Double" not in text:
            text=text[:12]+" Mac Pherson "+text[12:]
    text=text.title()
    if "Pherson" in text:
        if "Strut" not in text and "Suspension" in text:
            text=text.replace("Suspension","")
    if "," in text:
        text=text.replace(","," ")
    if "  " in text:
        text=text.replace("  "," ")
    if text[0]==" ":
        text=text[1:]
    text=text.title()
    if ("Spring" not in text and "Springs" not in text) and ("Pherson" in text):
        if "Coil" in text:
            text=text+" Spring"
            try:
                text=text.replace("With","&")
                if "with" in text:
                    raise Exception("Own Exception")
            except:
                text=text.replace("with","&")
        else:
            if "Struct" in text:
                text=text.replace("Struct","Strut")
            if "Strut" not in text and "Link" not in text and "Stabiliser" not in text:
                text=text+" Strut"
            if "Struts" in text:
                text=text.replace("Struts","Strut")
    else:
        if "Springs" in text:
            text=text.replace("Springs","Spring")
        if "Struts" in text:
            text=text.replace("Struts","Strut")
        if "Strut" not in text and "Double" not in text and "Link" not in text and "Ifs" not in text and "Suspension" not in text:
            coil=text.find("Coil")
            if coil!=-1:
                text=text[:text.find("Coil")]+"Strut "+text[text.find("Coil"):]
        if "And" in text:
                text=text.replace("And","&")
        try:
            text=text.replace("With","&")
            if "with" in text:
                raise Exception("Own Exception")
        except:
            text=text.replace("with","&")
    if "  " in text:
        text=text.replace("  "," ")
    if "Coil" in text and "Strut" in text:
        if text[text.find("Coil")-2]!="&":
            text=text[:text.find("Coil")]+"& "+text[text.find("Coil"):]
    text=text.title()
    if "Front" not in text:
        if "Suspension" not in text:
            if "System" not in text:
                text=text+" Front Suspension"
                if "Adaptive" in text:
                    text=text+" System"
            else:
                find_=text.find("System")
                text=text[:find_]+"Front Suspension "+text[find_:]
        else:
            find_=text.find("Suspension")
            text=text[:find_]+"Front "+text[find_:]
            if "System" not in text:
                if "Adaptive" in text and "Damping" in text:
                    text=text+" System"
    else:
        text=text+" Suspension"
    find_=text.find("Suspension")
    find1_=text.rfind("Suspension")
    if find_!=find1_:
        text=text[:find1_-1]
    if ";" in text:
        text=text.replace(";"," With")
    if "`" in text:
        text=text.replace("`","")
    if "Stablizer" in text:
        text=text.replace("Stablizer","Stabilizer")
    return text
In [446]:
#Maruti
In [447]:
main.loc[main["Model"]=="Maruti"][["Front Suspension"]]["Front Suspension"].unique()
Out[447]:
array(['Mac Pherson Strut', 'Macpherson Strut',
        'McPherson Strut with coil spring',
        'MacPherson Strut with Coil Spring',
        'Mac Pherson Strut with Coil Spring', 'McPherson',
        'McPherson Strut', 'Mac Pherson Strut & Coil Spring',
        'MacPherson Strut', 'MacPherson  Strut with Coil Spring',
        'Mac Pherson Strut & Coil', nan], dtype=object)
In [448]:
com=main.loc[main["Model"]=="Maruti"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [449]:
com.unique()
Out[449]:
array(['Mac Pherson Strut Front Suspension',
        'Mac Pherson Strut & Coil Spring Front Suspension', nan],
      dtype=object)
In [450]:
#Ford
In [451]:
main.loc[main["Model"]=="Ford"][["Front Suspension"]]["Front Suspension"].unique()
Out[451]:
array(['Independent MacPherson Strut with coil spring and anti-roll bar',
        'Independent McPherson Strut with Dual Path Mounts',
        'McPherson Strut', 'Independent MacPherson struts',
        'Independent MacPherson Strut', 'Independent Mcpherson',
        'Independent Coil Spring With Anti-Roll Bar',
        'Independent McPherson Strut with Coil Spring',
        'Independent McPherson Strut with Coil spring & Anti-roll bar',
        'Independent McPherson strut with coil spring', nan,
        'Double Ball Joint MacPherson Strut with Stabilizer Bar',
        'Independent McPherson struts with offset coil spring',
        'Independent McPherson',
        'Independent McPherson struts with offset coil spring & stabiliser bar',
        'Independent Double Wishbone With Torsion Bar Spring & Stabilizer Bar',
        'MacPherson Strut', 'Independent Coil Spring with Anti-roll Bar',
        'Independent McPherson Strut With Coil Spring & Anti-Roll Bar',
        'Double Wishbone',
        'Independent double wishbone with torsion bar spring & stabilizer bar',
        'Independent McPherson Strut with Coil Spring & Anti-Roll Bar'],
      dtype=object)
In [452]:
com=main.loc[main["Model"]=="Ford"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [453]:
com.unique()
Out[453]:
array(['Independent Mac Pherson Strut & Coil Spring & Anti-Roll Bar Front Suspension',
        'Independent Mac Pherson Strut With Dual Path Mounts Front Suspension',
        'Mac Pherson Strut Front Suspension',
        'Independent Mac Pherson Strut Front Suspension',
        'Independent Mac Pherson Strut & Coil Spring Front Suspension',
        nan,
        'Double Ball Joint Mac Pherson Strut With Stabilizer Bar Front Suspension',
        'Independent Mac Pherson Strut & Offset & Coil Spring Front Suspension',
        'Independent Mac Pherson Strut & Offset & Coil Spring & Stabiliser Bar Front Suspension',
        'Independent Double Wishbone & Torsion Bar Spring & Stabilizer Bar Front Suspension',
        'Double Wishbone Front Suspension'], dtype=object)
In [454]:
#Audi
In [455]:
main.loc[main["Model"]=="Audi"][["Front Suspension"]]["Front Suspension"].unique()
Out[455]:
array(['Air Suspension', 'underbody guard with heavy-duty', nan,
        'Five-link front suspension; tubular anti-roll bar', 'RS Sports',
        'Five-link front axle; tubular anti-roll bar; air spring suspension',
        'Sport Adaptive Air Suspension', 'Adaptive Air Suspension',
        'RS Sports Suspension plus with DRC', 'S Sports suspension'],
      dtype=object)
In [456]:
com=main.loc[main["Model"]=="Audi"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [457]:
com.unique()
Out[457]:
array(['Air Front Suspension',
        'Underbody Guard & Heavy-Duty Front Suspension', nan,
        'Five-Link Front Suspension With Tubular Anti-Roll Bar',
        'Rs Sports Front Suspension',
        'Five-Link Front Axle With Tubular Anti-Roll Bar With Air Spring Suspension',
        'Sport Adaptive Air Front Suspension',
        'Adaptive Air Front Suspension',
        'Rs Sports Front Suspension Plus & Drc',
        'S Sports Front Suspension'], dtype=object)
In [458]:
#Bentley
In [459]:
main.loc[main["Model"]=="Bentley"][["Front Suspension"]]["Front Suspension"].unique()
Out[459]:
array(['Air Suspension', 'Four link Double Wishbones'], dtype=object)
In [460]:
com=main.loc[main["Model"]=="Bentley"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [461]:
com.unique()
Out[461]:
array(['Air Front Suspension',
        'Four Link Double Wishbones Front Suspension'], dtype=object)
In [462]:
#Force
In [463]:
main.loc[main["Model"]=="Force"][["Front Suspension"]]["Front Suspension"].unique()
Out[463]:
array(['Independent double wishbone with coil spring'], dtype=object)
In [464]:
com=main.loc[main["Model"]=="Force"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [465]:
com.unique()
Out[465]:
array(['Independent Double Wishbone & Coil Spring Front Suspension'],
      dtype=object)
In [466]:
#Ferrari
In [467]:
main.loc[main["Model"]=="Ferrari"][["Front Suspension"]]["Front Suspension"].unique()
Out[467]:
array([nan, 'independent,double wishbones(Adaptive Dampers)',
        'Magnetorheological damper', 'adaptive magnetic suspension'],
      dtype=object)
In [468]:
com=main.loc[main["Model"]=="Ferrari"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [469]:
com.unique()
Out[469]:
array([nan,
        'Independent Double Wishbones(Adaptive Dampers) Front Suspension System',
        'Magnetorheological Damper Front Suspension',
        'Adaptive Magnetic Front Suspension'], dtype=object)
In [470]:
#Honda
In [471]:
main.loc[main["Model"]=="Honda"][["Front Suspension"]]["Front Suspension"].unique()
Out[471]:
array(['MacPherson Strut,Coil Spring', 'McPherson Strut with Coil Spring',
        'MacPherson Strut, Coil Spring', 'McPherson Strut, Coil Spring',
        'McPherson Strut with coil spring', 'Mcpherson Strut Coil Spring'],
      dtype=object)
In [472]:
com=main.loc[main["Model"]=="Honda"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [473]:
com.unique()
Out[473]:
array(['Mac Pherson Strut & Coil Spring Front Suspension'], dtype=object)
In [474]:
#Hyundai
In [475]:
main.loc[main["Model"]=="Hyundai"][["Front Suspension"]]["Front Suspension"].unique()
Out[475]:
array(['McPherson strut with coil spring', 'McPherson strut',
        'Mcpherson Strut', 'McPherson Strut'], dtype=object)
In [476]:
com=main.loc[main["Model"]=="Hyundai"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [477]:
com.unique()
Out[477]:
array(['Mac Pherson Strut & Coil Spring Front Suspension',
        'Mac Pherson Strut Front Suspension'], dtype=object)
In [478]:
#Isuzu
In [479]:
main.loc[main["Model"]=="Isuzu"][["Front Suspension"]]["Front Suspension"].unique()
Out[479]:
array(['Independent Double wishbone coil springs gas shock absorbers stabiliser bar',
        'Independent Double Wishbone,Coil Spring',
        'Double Wishbone, Coil Spring',
        'Independent Double Wishbone, Coil Spring'], dtype=object)
In [480]:
com=main.loc[main["Model"]=="Isuzu"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [481]:
com.unique()
Out[481]:
array(['Independent Double Wishbone Coil Spring Gas Shock Absorbers Stabiliser Bar Front Suspension',
        'Independent Double Wishbone Coil Spring Front Suspension',
        'Double Wishbone Coil Spring Front Suspension'], dtype=object)
In [482]:
#Jeep
In [483]:
main.loc[main["Model"]=="Jeep"][["Front Suspension"]]["Front Suspension"].unique()
Out[483]:
array(['McPherson Strut with Lower Control Arm',
        'Mcpherson Strut with Frequency Selective Damping, HRS with Anti Roll Bar',
        'Independent double wishbone', nan,
        'Mcpherson Strut with Lower Control Arm'], dtype=object)
In [484]:
com=main.loc[main["Model"]=="Jeep"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [485]:
com.unique()
Out[485]:
array(['Mac Pherson Strut With Lower Control Arm Front Suspension',
        'Mac Pherson Strut With Frequency Selective Damping Hrs With Anti Roll Bar Front Suspension',
        'Independent Double Wishbone Front Suspension', nan], dtype=object)
In [486]:
#Kia
In [487]:
main.loc[main["Model"]=="Kia"][["Front Suspension"]]["Front Suspension"].unique()
Out[487]:
array(['Mcpherson strut with coil spring',
        'McPherson Strut with Coil Spring',
        'McPherson Strut with coil spring', 'McPherson Strut',
        'Macpherson Strut with coil spring', 'McPherson suspension'],
      dtype=object)
In [488]:
com=main.loc[main["Model"]=="Kia"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [489]:
com.unique()
Out[489]:
array(['Mac Pherson Strut & Coil Spring Front Suspension',
        'Mac Pherson Strut Front Suspension'], dtype=object)
In [490]:
#Lamborghini
In [491]:
main.loc[main["Model"]=="Lamborghini"][["Front Suspension"]]["Front Suspension"].unique()
Out[491]:
array(['The Magneto Rheological Suspension',
        'Aluminum double-wishbone suspension',
        'Push rod magneto-rheologic active with horizontal dampers',
        'adaptive air suspension'], dtype=object)
In [492]:
com=main.loc[main["Model"]=="Lamborghini"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [493]:
com.unique()
Out[493]:
array(['The Magneto Rheological Front Suspension',
        'Aluminum Double-Wishbone Front Suspension',
        'Push Rod Magneto-Rheologic Active & Horizontal Dampers Front Suspension',
        'Adaptive Air Front Suspension'], dtype=object)
In [494]:
#Land_Rover
In [495]:
main.loc[main["Model"]=="Land_Rover"][["Front Suspension"]]["Front Suspension"].unique()
Out[495]:
array(['Electronic Air Suspension', 'Double wishbones Coil Suspension',
        nan, 'MacPherson Strut'], dtype=object)
In [496]:
com=main.loc[main["Model"]=="Land_Rover"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [497]:
com.unique()
Out[497]:
array(['Electronic Air Front Suspension',
        'Double Wishbones Coil Front Suspension', nan,
        'Mac Pherson Strut Front Suspension'], dtype=object)
In [498]:
#Lexus
In [499]:
main.loc[main["Model"]=="Lexus"][["Front Suspension"]]["Front Suspension"].unique()
Out[499]:
array(['MacPherson Strut', 'Multi-link type,coil springs',
        'AIR Adaptive Variable Suspension', 'MacPherson Struts',
        'MacPherson struts', 'Double Wishbone with Stabilizer'],
      dtype=object)
In [500]:
com=main.loc[main["Model"]=="Lexus"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [501]:
com.unique()
Out[501]:
array(['Mac Pherson Strut Front Suspension',
        'Multi-Link Type Coil Spring Front Suspension',
        'Air Adaptive Variable Front Suspension',
        'Double Wishbone & Stabilizer Front Suspension'], dtype=object)
In [502]:
#Mahindra
In [503]:
main.loc[main["Model"]=="Mahindra"][["Front Suspension"]]["Front Suspension"].unique()
Out[503]:
array(['MacPherson Strut with anti-roll bar',
        'Double Wishbone Suspension with Coil over Shocks with FDD & MTV-CL',
        'Double Wishbone with Coil Spring', 'IFS Coil Spring',
        'McPherson Strut Independent Suspension with FSD and Stabilizer bar',
        'Independent McPherson Strut with Dual Path Mounts, Coil Spring',
        'Independent Double Wishbone Front Suspension with Coil Over Damper & Stabiliser Bar',
        nan, 'MacPherson Strut with anti-roll bar`',
        'Rigid axle with leaf spring',
        'MacPherson Type with Wishbone Link',
        'Double Wish-bone Type, Independent Front Coil Spring',
        'Double Wishbone'], dtype=object)
In [504]:
com=main.loc[main["Model"]=="Mahindra"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [505]:
com.unique()
Out[505]:
array(['Mac Pherson Strut With Anti-Roll Bar Front Suspension',
        'Double Wishbone Front Suspension & Coil Over Shocks & Fdd & Mtv-Cl',
        'Double Wishbone & Coil Spring Front Suspension',
        'Ifs Coil Spring Front Suspension',
        'Mac Pherson Strut Independent Front Suspension With Fsd And Stabilizer Bar',
        'Independent Mac Pherson Strut & Dual Path Mounts & Coil Spring Front Suspension',
        'Independent Double Wishbone Front Suspension & Coil Over Damper & Stabiliser Bar',
        nan, 'Rigid Axle & Leaf Spring Front Suspension',
        'Mac Pherson Type With Wishbone Link Front Suspension',
        'Double Wish-Bone Type Independent Front Coil Spring Suspension',
        'Double Wishbone Front Suspension'], dtype=object)
In [506]:
#Aston_Martin
In [507]:
main.loc[main["Model"]=="Aston_Martin"][["Front Suspension"]]["Front Suspension"].unique()
Out[507]:
array(['independent double wishbone design coil springs, anti-roll bar and adaptive damping',
        'Independent double wishbone, coil springs, anti-roll bar and adaptive dampers',
        'Independent double wishbone'], dtype=object)
In [508]:
com=main.loc[main["Model"]=="Aston_Martin"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [509]:
com.unique()
Out[509]:
array(['Independent Double Wishbone Design Coil Spring Anti-Roll Bar & Adaptive Damping Front Suspension System',
        'Independent Double Wishbone Coil Spring Anti-Roll Bar & Adaptive Dampers Front Suspension System',
        'Independent Double Wishbone Front Suspension'], dtype=object)
In [510]:
#Maserati
In [511]:
main.loc[main["Model"]=="Maserati"][["Front Suspension"]]["Front Suspension"].unique()
Out[511]:
array(['Air Adaptive Suspensions', 'Double Wishbone', nan,
        'Quattroporte Sport GT S is fitted with the single-setting racing-style suspension system'],
      dtype=object)
In [512]:
com=main.loc[main["Model"]=="Maserati"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [513]:
com.unique()
Out[513]:
array(['Air Adaptive Front Suspensions',
        'Double Wishbone Front Suspension', nan,
        'Quattroporte Sport Gt S Is Fitted & The Single-Setting Racing-Style Front Suspension System'],
      dtype=object)
In [514]:
#Mercedes-Benz
In [515]:
main.loc[main["Model"]=="Mercedes-Benz"][["Front Suspension"]]["Front Suspension"].unique()
Out[515]:
array(['AIRMATIC', 'Air Suspension', 'AMG Suspension', nan,
        'AMG RIDE CONTROL sports suspension', 'Adaptive Damping System',
        'AMG RIDE CONTROL+', 'four-link axle suspension',
        'SUSPENSION WITH  ADAPTIVE DAMPING SYSTEM', 'ADAPTIVE DAMPING',
        'AMG RIDE CONTROL suspension', 'AIRMATIC suspension',
        'active roll stabilization intelligent suspension',
        'Agility control', 'Adaptive Air Suspension', 'AMG RIDE CONTROL',
        'DYNAMIC BODY CONTROL suspension', 'Independent Suspension',
        'Rigid Leaf Spring', 'adaptive damping Suspension'], dtype=object)
In [516]:
com=main.loc[main["Model"]=="Mercedes-Benz"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [517]:
com.unique()
Out[517]:
array(['Airmatic Front Suspension', 'Air Front Suspension',
        'Amg Front Suspension', nan,
        'Amg Ride Control Sports Front Suspension',
        'Adaptive Damping Front Suspension System',
        'Amg Ride Control+ Front Suspension',
        'Four-Link Axle Front Suspension',
        'Front Suspension & Adaptive Damping System',
        'Amg Ride Control Front Suspension',
        'Active Roll Stabilization Intelligent Front Suspension',
        'Agility Control Front Suspension',
        'Adaptive Air Front Suspension',
        'Dynamic Body Control Front Suspension',
        'Independent Front Suspension',
        'Rigid Leaf Spring Front Suspension'], dtype=object)
In [518]:
#MG
In [21]:
main.loc[main["Model"]=="MG"][["Front Suspension"]]["Front Suspension"].unique()
Out[21]:
array(['Macpherson Strut with Stabilizer bar',
        'Mcpherson Strut with Stablizer bar',
        'Dual Helix Independent Suspension', 'MacPherson Strut'],
      dtype=object)
In [22]:
com=main.loc[main["Model"]=="MG"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [23]:
com.unique()
Out[23]:
array(['Mac Pherson Strut With Stabilizer Bar Front Suspension',
        'Dual Helix Independent Front Suspension',
        'Mac Pherson Strut Front Suspension'], dtype=object)
In [24]:
#Mini
In [25]:
main.loc[main["Model"]=="Mini"][["Front Suspension"]]["Front Suspension"].unique()
Out[25]:
array(['single joint spring-strut front axle', 'Sport Suspension',
        'MacPherson Struct', nan, 'Single-Link Spring-Strut'], dtype=object)
In [26]:
com=main.loc[main["Model"]=="Mini"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [27]:
com.unique()
Out[27]:
array(['Single Joint Spring-Strut Front Axle Suspension',
        'Sport Front Suspension', 'Mac Pherson Strut Front Suspension',
        nan, 'Single-Link Spring-Strut Front Suspension'], dtype=object)
In [28]:
#Mitsubishi
In [29]:
main.loc[main["Model"]=="Mitsubishi"][["Front Suspension"]]["Front Suspension"].unique()
Out[29]:
array(['McPherson Strut with coil spring & stabilizer bar',
        'MacPherson Coil Springs with Stablizer Bar',
        'Double wishbone torsion bar with stabiliser bar',
        'Independent, Double wishbone coil springs with stabilizer bar',
        'McPherson Strut with coil spring & Stabilizer bar', nan,
        'McPherson Strut With Bilstein Shock Absorbers,Eibach Springs & Stabilizer Bar',
        'Independent Double Wishbone Coil Spring With Stabilizer Bar',
        'McPherson Strut & Coil Spring', 'Double Wishbone',
        'MacPherson strut with coil springs and stabilizer bar',
        'Double Wishbone With Coil Spring Suspension'], dtype=object)
In [30]:
com=main.loc[main["Model"]=="Mitsubishi"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [31]:
com.unique()
Out[31]:
array(['Mac Pherson Strut & Coil Spring & Stabilizer Bar Front Suspension',
        'Double Wishbone Torsion Bar & Stabiliser Bar Front Suspension',
        'Independent Double Wishbone Coil Spring & Stabilizer Bar Front Suspension',
        nan,
        'Mac Pherson Strut & Bilstein Shock Absorbers Eibach Spring & Stabilizer Bar Front Suspension',
        'Mac Pherson Strut & Coil Spring Front Suspension',
        'Double Wishbone Front Suspension',
        'Double Wishbone & Coil Spring Front Suspension'], dtype=object)
In [32]:
#Nissan
In [33]:
main.loc[main["Model"]=="Nissan"][["Front Suspension"]]["Front Suspension"].unique()
Out[33]:
array(['Mac Pherson strut with Lower Transverse link',
        'McPherson Strut with Coil Spring', 'Double Wishbone'],
      dtype=object)
In [34]:
com=main.loc[main["Model"]=="Nissan"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [35]:
com.unique()
Out[35]:
array(['Mac Pherson Strut With Lower Transverse Link Front Suspension',
        'Mac Pherson Strut & Coil Spring Front Suspension',
        'Double Wishbone Front Suspension'], dtype=object)
In [36]:
#Porsche
In [37]:
main.loc[main["Model"]=="Porsche"][["Front Suspension"]]["Front Suspension"].unique()
Out[37]:
array(['Aluminum double-wishbone, independent wheel suspension',
        'McPherson spring-strut', 'Double wishbone',
        'Adaptive air suspension', 'Active Suspension',
        'spring-strut suspension', 'Aluminium double-wishbone front axle',
        'Double Wishbone', 'Lightweight spring-strut suspension', nan,
        'Adaptive air suspension including Porsche Active Suspension'],
      dtype=object)
In [38]:
com=main.loc[main["Model"]=="Porsche"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [39]:
com.unique()
Out[39]:
array(['Aluminum Double-Wishbone Independent Wheel Front Suspension',
        'Mac Pherson Spring-Strut Front Suspension',
        'Double Wishbone Front Suspension',
        'Adaptive Air Front Suspension', 'Active Front Suspension',
        'Spring-Strut Front Suspension',
        'Aluminium Double-Wishbone Front Axle Suspension',
        'Lightweight Spring-Strut Front Suspension', nan,
        'Adaptive Air Front Suspension Including Porsche Active'],
      dtype=object)
In [40]:
#Renault
In [41]:
main.loc[main["Model"]=="Renault"][["Front Suspension"]]["Front Suspension"].unique()
Out[41]:
array(['Mac Pherson strut with Lower Transverse link',
        'Macpherson strut with lower triangle & coil spring',
        'Mac Pherson strut with lower Transverse link'], dtype=object)
In [42]:
com=main.loc[main["Model"]=="Renault"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [43]:
com.unique()
Out[43]:
array(['Mac Pherson Strut With Lower Transverse Link Front Suspension',
        'Mac Pherson Strut & Lower Triangle & Coil Spring Front Suspension'],
      dtype=object)
In [44]:
#Rolls-Royce
In [45]:
main.loc[main["Model"]=="Rolls-Royce"][["Front Suspension"]]["Front Suspension"].unique()
Out[45]:
array(['Double Wishbone', nan, 'Double wishbone front axle'], dtype=object)
In [46]:
com=main.loc[main["Model"]=="Rolls-Royce"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [47]:
com.unique()
Out[47]:
array(['Double Wishbone Front Suspension', nan,
        'Double Wishbone Front Axle Suspension'], dtype=object)
In [48]:
#Skoda
In [49]:
main.loc[main["Model"]=="Skoda"][["Front Suspension"]]["Front Suspension"].unique()
Out[49]:
array(['McPherson suspension with lower triangular links and stabiliser bar',
        'McPherson suspension with lower triangular links and torsion stabiliser',
        'MacPherson suspension'], dtype=object)
In [50]:
com=main.loc[main["Model"]=="Skoda"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [51]:
com.unique()
Out[51]:
array(['Mac Pherson With Lower Triangular Links And Stabiliser Bar Front Suspension',
        'Mac Pherson With Lower Triangular Links And Torsion Stabiliser Front Suspension',
        'Mac Pherson Strut Front Suspension'], dtype=object)
In [52]:
#Tata
In [53]:
main.loc[main["Model"]=="Tata"][["Front Suspension"]]["Front Suspension"].unique()
Out[53]:
array(['Independent Lower Wishbone McPherson Strut with Coil Spring & Anti Roll Bar',
        'Independent Lower Wishbone McPherson Strut with Coil Spring',
        'Independent Lower Wishbone McPherson Dual Path Strut',
        'Independent, Lower Wishbone, Mcpherson Strut With Coil Spring',
        'Independent MacPherson dual path strut with coil spring',
        'Independent, Lower wishbone, McPherson Strut with coil spring',
        'Independent MacPherson strut with coil spring',
        'Independent MacPherson Dual Path Strut with Coil Spring',
        'Semi-elliptical leaf springs-6leaves', nan,
        'Independent, Lower Wishbone, McPherson Strut with Coil Spring'],
      dtype=object)
In [54]:
com=main.loc[main["Model"]=="Tata"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [55]:
com.unique()
Out[55]:
array(['Independent Lower Wishbone Mac Pherson Strut & Coil Spring & Anti Roll Bar Front Suspension',
        'Independent Lower Wishbone Mac Pherson Strut & Coil Spring Front Suspension',
        'Independent Lower Wishbone Mac Pherson Dual Path Strut Front Suspension',
        'Independent Mac Pherson Dual Path Strut & Coil Spring Front Suspension',
        'Independent Mac Pherson Strut & Coil Spring Front Suspension',
        'Semi-Elliptical Leaf Spring-6Leaves Front Suspension', nan],
      dtype=object)
In [56]:
#Toyota
In [57]:
main.loc[main["Model"]=="Toyota"][["Front Suspension"]]["Front Suspension"].unique()
Out[57]:
array(['Double Wishbone', 'Double wishbone',
        'Double Wishbone With Torsion Bar', 'MacPherson Strut', nan],
      dtype=object)
In [58]:
com=main.loc[main["Model"]=="Toyota"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [59]:
com.unique()
Out[59]:
array(['Double Wishbone Front Suspension',
        'Double Wishbone & Torsion Bar Front Suspension',
        'Mac Pherson Strut Front Suspension', nan], dtype=object)
In [60]:
#Volkswagen
In [61]:
main.loc[main["Model"]=="Volkswagen"][["Front Suspension"]]["Front Suspension"].unique()
Out[61]:
array(['McPherson suspension and stabiliser bar',
        'McPherson strut with stabilizer bar',
        'Mc-Pherson suspension and stabiliser bar',
        'Independent suspension with coil spring'], dtype=object)
In [62]:
com=main.loc[main["Model"]=="Volkswagen"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [63]:
com.unique()
Out[63]:
array(['Mac Pherson Strut With Stabilizer Bar Front Suspension',
        'Independent Front Suspension & Coil Spring'], dtype=object)
In [64]:
#Volvo
In [65]:
main.loc[main["Model"]=="Volvo"][["Front Suspension"]]["Front Suspension"].unique()
Out[65]:
array(['Air', nan, 'MacPherson Strut', 'sophisticated Suspension'],
      dtype=object)
In [66]:
com=main.loc[main["Model"]=="Volvo"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [67]:
com.unique()
Out[67]:
array(['Air Front Suspension', nan, 'Mac Pherson Strut Front Suspension',
        'Sophisticated Front Suspension'], dtype=object)
In [68]:
#Applt the preprocess_Front_Suspention to all the brands
main["Front Suspension"]=main["Front Suspension"].apply(preprocess_Front_Suspension)
In [69]:
#preprocessed Front Suspension
len(main["Front Suspension"].unique())
Out[69]:
107
In [70]:
#Unpreprocessed Front Suspension
len(main_data["Front Suspension"].unique())
Out[70]:
170
In [71]:
main["Front Suspension"].unique()
Out[71]:
array(['Mac Pherson Strut Front Suspension',
        'Mac Pherson Strut & Coil Spring Front Suspension', nan,
        'Independent Mac Pherson Strut & Coil Spring & Anti-Roll Bar Front Suspension',
        'Independent Mac Pherson Strut With Dual Path Mounts Front Suspension',
        'Independent Mac Pherson Strut Front Suspension',
        'Independent Mac Pherson Strut & Coil Spring Front Suspension',
        'Double Ball Joint Mac Pherson Strut With Stabilizer Bar Front Suspension',
        'Independent Mac Pherson Strut & Offset & Coil Spring Front Suspension',
        'Independent Mac Pherson Strut & Offset & Coil Spring & Stabiliser Bar Front Suspension',
        'Independent Double Wishbone & Torsion Bar Spring & Stabilizer Bar Front Suspension',
        'Double Wishbone Front Suspension', 'Air Front Suspension',
        'Underbody Guard & Heavy-Duty Front Suspension',
        'Five-Link Front Suspension With Tubular Anti-Roll Bar',
        'Rs Sports Front Suspension',
        'Five-Link Front Axle With Tubular Anti-Roll Bar With Air Spring Suspension',
        'Sport Adaptive Air Front Suspension',
        'Adaptive Air Front Suspension',
        'Rs Sports Front Suspension Plus & Drc',
        'S Sports Front Suspension',
        'Four Link Double Wishbones Front Suspension',
        'Adaptive 2-Axle Air Front Suspension',
        'Adaptive M-Specific Front Suspension',
        'Adaptive M Front Suspension',
        'Double Joint Spring Strut Front Suspension',
        'Single-Joint Spring Strut Axle In Lightweight Aluminium-Steel Construction Front Suspension',
        'M Sport Front Suspension', 'Independent Damping Front Suspension',
        'Airmatic Front Suspension',
        'Adaptive Front Suspension & Variable Shock Absorber',
        'Adaptive Front Suspension', 'Air-Front Suspension',
        'Independent Double Wishbones Front Suspension',
        'Independent Double Wishbone & Coil Spring Front Suspension',
        'Independent Double Wishbones(Adaptive Dampers) Front Suspension System',
        'Magnetorheological Damper Front Suspension',
        'Adaptive Magnetic Front Suspension',
        'Independent Double Wishbone Coil Spring Gas Shock Absorbers Stabiliser Bar Front Suspension',
        'Independent Double Wishbone Coil Spring Front Suspension',
        'Double Wishbone Coil Spring Front Suspension',
        'Mac Pherson Strut With Lower Control Arm Front Suspension',
        'Mac Pherson Strut With Frequency Selective Damping Hrs With Anti Roll Bar Front Suspension',
        'Independent Double Wishbone Front Suspension',
        'The Magneto Rheological Front Suspension',
        'Aluminum Double-Wishbone Front Suspension',
        'Push Rod Magneto-Rheologic Active & Horizontal Dampers Front Suspension',
        'Electronic Air Front Suspension',
        'Double Wishbones Coil Front Suspension',
        'Multi-Link Type Coil Spring Front Suspension',
        'Air Adaptive Variable Front Suspension',
        'Double Wishbone & Stabilizer Front Suspension',
        'Mac Pherson Strut With Anti-Roll Bar Front Suspension',
        'Double Wishbone Front Suspension & Coil Over Shocks & Fdd & Mtv-Cl',
        'Double Wishbone & Coil Spring Front Suspension',
        'Ifs Coil Spring Front Suspension',
        'Mac Pherson Strut Independent Front Suspension With Fsd And Stabilizer Bar',
        'Independent Mac Pherson Strut & Dual Path Mounts & Coil Spring Front Suspension',
        'Independent Double Wishbone Front Suspension & Coil Over Damper & Stabiliser Bar',
        'Rigid Axle & Leaf Spring Front Suspension',
        'Mac Pherson Type With Wishbone Link Front Suspension',
        'Double Wish-Bone Type Independent Front Coil Spring Suspension',
        'Independent Double Wishbone Design Coil Spring Anti-Roll Bar & Adaptive Damping Front Suspension System',
        'Independent Double Wishbone Coil Spring Anti-Roll Bar & Adaptive Dampers Front Suspension System',
        'Air Adaptive Front Suspensions',
        'Quattroporte Sport Gt S Is Fitted & The Single-Setting Racing-Style Front Suspension System',
        'Amg Front Suspension', 'Amg Ride Control Sports Front Suspension',
        'Adaptive Damping Front Suspension System',
        'Amg Ride Control+ Front Suspension',
        'Four-Link Axle Front Suspension',
        'Front Suspension & Adaptive Damping System',
        'Amg Ride Control Front Suspension',
        'Active Roll Stabilization Intelligent Front Suspension',
        'Agility Control Front Suspension',
        'Dynamic Body Control Front Suspension',
        'Independent Front Suspension',
        'Rigid Leaf Spring Front Suspension',
        'Mac Pherson Strut With Stabilizer Bar Front Suspension',
        'Dual Helix Independent Front Suspension',
        'Single Joint Spring-Strut Front Axle Suspension',
        'Sport Front Suspension',
        'Single-Link Spring-Strut Front Suspension',
        'Mac Pherson Strut & Coil Spring & Stabilizer Bar Front Suspension',
        'Double Wishbone Torsion Bar & Stabiliser Bar Front Suspension',
        'Independent Double Wishbone Coil Spring & Stabilizer Bar Front Suspension',
        'Mac Pherson Strut & Bilstein Shock Absorbers Eibach Spring & Stabilizer Bar Front Suspension',
        'Mac Pherson Strut With Lower Transverse Link Front Suspension',
        'Aluminum Double-Wishbone Independent Wheel Front Suspension',
        'Mac Pherson Spring-Strut Front Suspension',
        'Active Front Suspension', 'Spring-Strut Front Suspension',
        'Aluminium Double-Wishbone Front Axle Suspension',
        'Lightweight Spring-Strut Front Suspension',
        'Adaptive Air Front Suspension Including Porsche Active',
        'Mac Pherson Strut & Lower Triangle & Coil Spring Front Suspension',
        'Double Wishbone Front Axle Suspension',
        'Mac Pherson With Lower Triangular Links And Stabiliser Bar Front Suspension',
        'Mac Pherson With Lower Triangular Links And Torsion Stabiliser Front Suspension',
        'Independent Lower Wishbone Mac Pherson Strut & Coil Spring & Anti Roll Bar Front Suspension',
        'Independent Lower Wishbone Mac Pherson Strut & Coil Spring Front Suspension',
        'Independent Lower Wishbone Mac Pherson Dual Path Strut Front Suspension',
        'Independent Mac Pherson Dual Path Strut & Coil Spring Front Suspension',
        'Semi-Elliptical Leaf Spring-6Leaves Front Suspension',
        'Double Wishbone & Torsion Bar Front Suspension',
        'Independent Front Suspension & Coil Spring',
        'Sophisticated Front Suspension'], dtype=object)
In [73]:
main["Rear Suspension"]
Out[73]:
0      Torsion Beam
1               NaN
            ...     
548             NaN
549             NaN
Name: Rear Suspension, Length: 160031, dtype: object
In [74]:
main["Rear Suspension"].unique()
Out[74]:
array(['Torsion Beam', nan, 'Torsion Beam & coil spring',
        'Torsion Beam with Coil Spring', '3-Link Rigid Axle',
        'Torsion Beam & Coil Spring', 'Leaf Spring Rigid Axle',
        'Semi-independent twist beam with twin gas and oil filled shock absorbers',
        'Semi Independent Twist Beam, Coil Springs',
        'Semi Independent Twist Beam',
        'Heavy duty twist-beam with strut-type coil spring/damper units',
        'Semi-independent twist beam',
        'Semi Independent (Twist Beam Type)',
        'Coil Spring with Anti Roll Bar', 'Twist Beam',
        'Semi-independent twist beam with twin shock absorbers filled with gas & oil',
        'Semi-Independent Twist Beam', 'Semi Independent',
        'Integral Link Independent with Coil Springs & Stabilizer Bar',
        'Semi-Independent heavy duty twist-beam with coil springs',
        'Progressive Linear Rate Leaf Springs With Low Friction Pads',
        'Semi-Independent Twist Beam With Twin Shock Absorbers Filled With Gas & Oil',
        'Coil Spring, Watts Linkage Type with Anti-roll Bar',
        'Progessive Linear Rate Leaf Springs With Low Friction Pads',
        'Semi-independent Twist Beam With Twin Shock Absorbers Filled With Gas & Oil',
        'Leaf Spring',
        'Progressive linear rate leaf springs with low friction pads',
        'Semi-Independent Twist Beam With Twin Shock Absorbers filled with gas & oil',
        'Air Suspension', '4-link',
        'Five-link front suspension; tubular anti-roll bar', 'RS Sports',
        'Five-link front axle; tubular anti-roll bar; air spring suspension',
        'Sport Adaptive Air Suspension', 'Adaptive Air Suspension',
        'RS Adaptive Air Suspension', 'S Sports suspension',
        'Trapezoidak muliti-Link', 'Air suspension',
        'Adaptive 2-axle Air Suspension', 'Adaptive M-specific Suspension',
        'Adaptive M suspension', 'Five Arm', 'Dynamic Damper Control',
        'M Sport Suspension', 'independent damping', 'AIRMATIC suspension',
        'Adaptive air suspension',
        'Adaptive Suspension with variable shock absorber',
        'Adaptive Suspension', 'Adaptive M-Specific Suspension',
        'Adaptive M Suspension', 'Air-Suspension',
        'Independent Double wishbones',
        'Multi-link with Pan hard rod & Coil Spring',
        'independent,multi-link(Adaptive Dampers)',
        'Magnetorheological damper', 'adaptive magnetic suspension',
        'independent, multi-link(Adaptive Dampers)',
        'Torsion Beam Axle,Coil Spring',
        'Twisted Torsion Beam, Coil Spring', 'Torsion Bar, Coil Spring',
        'Torsion beam with coil spring', 'Torsion Beam Axle, Coil Spring',
        'Torsion Beam axle, Coil Spring', 'Coupled torsion beam axle',
        'Coupled Torsion Beam Axle',
        'Coupled torsion beam axle with coil spring',
        'Multi-link with coil spring',
        'Penta-link coil suspension gas shock absorbers stabiliser bar',
        'Soft ride,Leaf Spring', 'Semi-Elliptic Leaf Spring',
        'Soft Ride, Leaf Spring',
        'Multi Link Suspension with Strut Assembly',
        'Multi-Link with Strut Suspension with FSD, with Anti Roll Bar',
        'heavy duty with gas shocks', 'Multi Link Suspension',
        'Coupled Torsion Beam Axle with Coil Spring', 'Multi Link',
        'Multi-Link', 'Aluminum double-wishbone suspension',
        'Push rod magneto-rheologic active with horizontal dampers',
        'adaptive air suspension', 'Multi-link',
        'Electronic Air Suspension', 'Integral Coil Spring',
        'Double-wishbone', 'multi-link suspension',
        'AIR Adaptive Variable Suspension', 'Double Wishbone Suspension',
        'Double Wishbone', '4-link Type with Coil Springs',
        'Twist beam suspension with Coil Spring',
        'Pentalink Suspension with WATT’s Linkage with FDD & MTV-CL',
        '5 Link Rear Suspension with Coil Spring', 'Rigid leaf Spring',
        'Multi-Link Independent Suspension with FSD Stabilizer bar',
        'Semi-independent Twist Beam with Coil Spring',
        'Multilink Solid Rear Axle with Coil Over Damper & Stabiliser Bar',
        'Rigid axle with leaf spring',
        'H-Section Torsion Beam with Coil Spring',
        'Multi Link Coil Spring Suspension and Anti-roll Bar',
        'multi-link, coil springs, anti-roll bar and adaptive damping Adaptive Damping System',
        'Multi-link, coil springs, anti-roll bar and adaptive dampers  Adaptive Damping System',
        'Air Adaptive Suspensions', 'Five-Arm Multilink',
        'Quattroporte Sport GT S is fitted with the single-setting racing-style suspension system',
        'AIRMATIC', 'AMG Suspension', 'AMG RIDE CONTROL sports suspension',
        'Adaptive Damping System', 'AMG RIDE CONTROL+',
        'five-link multi-link independent suspension',
        'SUSPENSION WITH  ADAPTIVE DAMPING SYSTEM', 'ADAPTIVE DAMPING',
        'AMG RIDE CONTROL suspension', 'air suspension',
        'active roll stabilization intelligent suspension',
        'Agility control', 'AMG RIDE CONTROL',
        'DYNAMIC BODY CONTROL suspension', 'Coil spring',
        'Rigid Leaf Spring', 'adaptive damping Suspension',
        'Semi Independent Helical Spring Torison Beam',
        'Semi Independent Helical Spring Torsion Beam',
        'Five Link Integral Suspension', 'multiple control-arm rear axle',
        'Sport Suspension', 'Multiple-Control-Arm',
        'Independent Multi-link with Stabilizer bar',
        'Multi-Link Coil Spring with Stablizer Bar',
        '3 Link coil spring rigid axle with stabiliser bar',
        'Multi-link coil springs with stabilizer bar',
        'Independent Multi-link with stabilizer bar',
        'Multi-Link With Bilstein Shock Absorbers,Eibach Springs & Stabilizer Bar',
        'Multi-Link Coil Springs With Stabilizer', '3 Link',
        'Multi link with coil springs and stabilizer bar',
        '3 Link Coil Spring Suspension',
        'Twin tube telescopic shock absorber',
        'Torsion Beam with Coil Springs',
        'Aluminum multi-link axle with subframe, independent wheel suspension',
        'Active Suspension', 'spring-strut suspension',
        'Aluminium multi-link rear axle', 'Self-Tracking Trapezoidal Link',
        'Lightweight spring-strut suspension',
        'Twist beam suspension with coil spring', 'Torsion beam axle',
        'multi-link rear axle', 'Twist Beam Axle',
        'Multi-element axle, with longitudinal and transverse links, with torsion stabiliser',
        'Multilink suspension, one longitudinal and three transverse arms',
        'Multi-element axle, with one longitudinal and transverse links, with torsion stabiliser',
        'Semi Independent Twist Blade with Panhard Rod & Coil Spring',
        'Semi-Independent closed profile Twist beam with Coil Spring and shock absorber',
        'Twist Beam with Coil Spring',
        'Semi-independent Twist Beam With Coil Spring And Shock Absorber',
        'Twist beam with coil spring and shock absorber',
        'Semi-independent; Rear Twist Beam with Dual path Strut',
        'Semi Independent Twist Blade with Panhard Rod and Coil Spring',
        'Twist beam with dual path Strut',
        'Twist beam with dual path strut',
        'Twist Beam with Coil Spring and Shock Absorber',
        'Innovative Two-stage semi-elliptical leaf springs',
        'Semi-independent Closed Profile Twist Beam with Dual Path Strut',
        'Semi-Independent Closed Profile Twist Beam with Dual Path Strut',
        'Innovative Two-stage Semi-elliptical leaf springs-7leaves',
        'Rear Twist Beam with Coil Spring',
        'Twist Beam with Coil Spring and Shock Absorberf',
        '4-Link With Coil Spring', 'leaf spring',
        '4-Link with Coil Spring', 'Twist beam axle',
        'Semi Indpendent Trailing Arm', 'Twist beam axle`',
        'Independent suspension by four-link axle', 'Air',
        'sophisticated Suspension'], dtype=object)
In [75]:
def preprocess_Rear_Suspension(text):
    text=str(text)
    list_of_symbols_tonull_out=["Type","(",")","Front"]
    if text=="nan":
        return np.nan
    text=text.title()
    if "&" in text:
        text=text.replace("&","With")
    if "," in text:
        w_index=text.find(",")
        if text[w_index+2]=="W":
            text=text.replace(",","")
        elif text[w_index+1]!=" " and text[w_index-1]!=" ":
            text=text.replace(","," With ")
        else:
            text=text.replace(","," With")
    if "With" in text:
        with_index=text.find("With")
        if text[with_index+4]!=" ":
            text=text[:with_index+4]+" "+text[with_index+4:]
    if "-" in text:
        text=text.replace("-"," ")
    for i in list_of_symbols_tonull_out:
        if i=="Type" and "(" not in text and ")" not in text:
            continue
        front_index=text.find("Front")
        try:
            if text[front_index+6]=="S":
                text=text.replace("{}".format(i),"")
                continue
        except:
            pass
        if i=="(":
            text=text.replace("{}".format(i)," ") 
        else:
            text=text.replace("{}".format(i),"")
    if "One" in text:
        text=text.replace("One","")
    if "  " in text:
        text=text.replace("  "," ")
    if ";" in text:
        text=text.replace(";"," With")
    if "Adaptive Damp" in text:
        for_index=text.find("Adaptive Damp")
        bac_index=text.rfind("Adaptive Damp")
        if for_index!=bac_index:
            text=text[:for_index+17]
        if "Damper" in text:
            text=text.replace("Dampers","Damping")
    if text[len(text)-1]==" ":
        text=text[:len(text)-1]
    if "Torison" in text:
        text=text.replace("Torison","Torsion")
    if "Link" in text and "Coil" in text:
        link_index=text.find("Link")
        if text[link_index+5]=="W":
            text=text[:link_index+4]+text[link_index+9:]
    if "Springs" in text:
        text=text.replace("Springs","Spring")
    if "Stablizer" in text:
        text=text.replace("Stablizer","Stabilizer")
    if "Stabilizer" in text:
        if "Bar" not in text:
            sta_index=text.find("Stabilizer")
            text=text[:sta_index+10]+" Bar"+text[sta_index+10:]
        if "Spring" in text:
            sta_index=text.find("Spring")
            text=text[:sta_index+6]+" With"+text[sta_index+10:]
    if "Withh" in text:
        text=text.replace("Withh","With")
    if "Absorberf" in text:
        text=text.replace("Absorberf","Absorber")
    if "`" in text:
        text=text.replace("`","")
    if "Rear" not in text:
        if "Suspension" not in text:
            text=text+" Rear Suspension"
        else:
            find_=text.find("Suspension")
            text=text[:find_]+"Rear "+text[find_:]
    else:
        text=text+" Suspension"
    return text
In [76]:
txt="independent,multi-link(Adaptive Dampers)"
In [77]:
preprocess_Rear_Suspension(txt)
Out[77]:
'Independent With Multi Link Adaptive Damping Rear Suspension'
In [78]:
#Maruti
In [79]:
main.loc[main["Model"]=="Maruti"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[79]:
array(['Torsion Beam', nan, 'Torsion Beam & coil spring',
        'Torsion Beam with Coil Spring', '3-Link Rigid Axle',
        'Torsion Beam & Coil Spring', 'Leaf Spring Rigid Axle'],
      dtype=object)
In [80]:
com=main.loc[main["Model"]=="Maruti"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [81]:
com.unique()
Out[81]:
array(['Torsion Beam Rear Suspension', nan,
        'Torsion Beam With Coil Spring Rear Suspension',
        '3 Link Rigid Axle Rear Suspension',
        'Leaf Spring Rigid Axle Rear Suspension'], dtype=object)
In [82]:
#Ford
In [83]:
main.loc[main["Model"]=="Ford"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[83]:
array(['Semi-independent twist beam with twin gas and oil filled shock absorbers',
        'Semi Independent Twist Beam, Coil Springs',
        'Semi Independent Twist Beam',
        'Heavy duty twist-beam with strut-type coil spring/damper units',
        'Semi-independent twist beam',
        'Semi Independent (Twist Beam Type)',
        'Coil Spring with Anti Roll Bar', 'Twist Beam',
        'Semi-independent twist beam with twin shock absorbers filled with gas & oil',
        nan, 'Semi-Independent Twist Beam', 'Semi Independent',
        'Integral Link Independent with Coil Springs & Stabilizer Bar',
        'Semi-Independent heavy duty twist-beam with coil springs',
        'Progressive Linear Rate Leaf Springs With Low Friction Pads',
        'Semi-Independent Twist Beam With Twin Shock Absorbers Filled With Gas & Oil',
        'Coil Spring, Watts Linkage Type with Anti-roll Bar',
        'Progessive Linear Rate Leaf Springs With Low Friction Pads',
        'Semi-independent Twist Beam With Twin Shock Absorbers Filled With Gas & Oil',
        'Leaf Spring',
        'Progressive linear rate leaf springs with low friction pads',
        'Semi-Independent Twist Beam With Twin Shock Absorbers filled with gas & oil'],
      dtype=object)
In [84]:
com=main.loc[main["Model"]=="Ford"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [85]:
com.unique()
Out[85]:
array(['Semi Independent Twist Beam With Twin Gas And Oil Filled Shock Absorbers Rear Suspension',
        'Semi Independent Twist Beam With Coil Spring Rear Suspension',
        'Semi Independent Twist Beam Rear Suspension',
        'Heavy Duty Twist Beam With Strut Type Coil Spring/Damper Units Rear Suspension',
        'Coil Spring With Anti Roll Bar Rear Suspension',
        'Twist Beam Rear Suspension',
        'Semi Independent Twist Beam With Twin Shock Absorbers Filled With Gas With Oil Rear Suspension',
        nan, 'Semi Independent Rear Suspension',
        'Integral Link Independent With Coil Spring With Stabilizer Bar Rear Suspension',
        'Semi Independent Heavy Duty Twist Beam With Coil Spring Rear Suspension',
        'Progressive Linear Rate Leaf Spring With Low Friction Pads Rear Suspension',
        'Coil Spring Watts Linkage Type With Anti Roll Bar Rear Suspension',
        'Progessive Linear Rate Leaf Spring With Low Friction Pads Rear Suspension',
        'Leaf Spring Rear Suspension'], dtype=object)
In [86]:
#Audi
In [87]:
main.loc[main["Model"]=="Audi"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[87]:
array(['Air Suspension', '4-link', nan,
        'Five-link front suspension; tubular anti-roll bar', 'RS Sports',
        'Five-link front axle; tubular anti-roll bar; air spring suspension',
        'Sport Adaptive Air Suspension', 'Adaptive Air Suspension',
        'RS Adaptive Air Suspension', 'S Sports suspension'], dtype=object)
In [88]:
com=main.loc[main["Model"]=="Audi"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [89]:
com.unique()
Out[89]:
array(['Air Rear Suspension', '4 Link Rear Suspension', nan,
        'Five Link Rear Suspension With Tubular Anti Roll Bar',
        'Rs Sports Rear Suspension',
        'Five Link Axle With Tubular Anti Roll Bar With Air Spring Rear Suspension',
        'Sport Adaptive Air Rear Suspension',
        'Adaptive Air Rear Suspension', 'Rs Adaptive Air Rear Suspension',
        'S Sports Rear Suspension'], dtype=object)
In [90]:
#Bentley
In [91]:
main.loc[main["Model"]=="Bentley"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[91]:
array(['Air Suspension', 'Trapezoidak muliti-Link', 'Air suspension'],
      dtype=object)
In [92]:
com=main.loc[main["Model"]=="Bentley"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [93]:
com.unique()
Out[93]:
array(['Air Rear Suspension', 'Trapezoidak Muliti Link Rear Suspension'],
      dtype=object)
In [94]:
#Force
In [95]:
main.loc[main["Model"]=="Force"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[95]:
array(['Multi-link with Pan hard rod & Coil Spring'], dtype=object)
In [96]:
com=main.loc[main["Model"]=="Force"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [97]:
com.unique()
Out[97]:
array(['Multi Link Pan Hard Rod With Coil Spring Rear Suspension'],
      dtype=object)
In [98]:
#Ferrari
In [99]:
main.loc[main["Model"]=="Ferrari"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[99]:
array([nan, 'independent,multi-link(Adaptive Dampers)',
        'Magnetorheological damper', 'adaptive magnetic suspension',
        'independent, multi-link(Adaptive Dampers)'], dtype=object)
In [100]:
com=main.loc[main["Model"]=="Ferrari"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [101]:
com.unique()
Out[101]:
array([nan,
        'Independent With Multi Link Adaptive Damping Rear Suspension',
        'Magnetorheological Damper Rear Suspension',
        'Adaptive Magnetic Rear Suspension'], dtype=object)
In [102]:
#Honda
In [103]:
main.loc[main["Model"]=="Honda"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[103]:
array(['Torsion Beam Axle,Coil Spring', 'Torsion Beam with Coil Spring',
        'Twisted Torsion Beam, Coil Spring', 'Torsion Bar, Coil Spring',
        'Torsion beam with coil spring', 'Torsion Beam Axle, Coil Spring',
        'Torsion Beam axle, Coil Spring'], dtype=object)
In [104]:
com=main.loc[main["Model"]=="Honda"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [105]:
com.unique()
Out[105]:
array(['Torsion Beam Axle With Coil Spring Rear Suspension',
        'Torsion Beam With Coil Spring Rear Suspension',
        'Twisted Torsion Beam With Coil Spring Rear Suspension',
        'Torsion Bar With Coil Spring Rear Suspension'], dtype=object)
In [106]:
#Hyundai
In [107]:
main.loc[main["Model"]=="Hyundai"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[107]:
array(['Coupled torsion beam axle', 'Coupled Torsion Beam Axle',
        'Coupled torsion beam axle with coil spring',
        'Multi-link with coil spring'], dtype=object)
In [108]:
com=main.loc[main["Model"]=="Hyundai"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [109]:
com.unique()
Out[109]:
array(['Coupled Torsion Beam Axle Rear Suspension',
        'Coupled Torsion Beam Axle With Coil Spring Rear Suspension',
        'Multi Link Coil Spring Rear Suspension'], dtype=object)
In [110]:
#Isuzu
In [111]:
main.loc[main["Model"]=="Isuzu"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[111]:
array(['Penta-link coil suspension gas shock absorbers stabiliser bar',
        'Soft ride,Leaf Spring', 'Semi-Elliptic Leaf Spring',
        'Soft Ride, Leaf Spring'], dtype=object)
In [112]:
com=main.loc[main["Model"]=="Isuzu"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [113]:
com.unique()
Out[113]:
array(['Penta Link Coil Rear Suspension Gas Shock Absorbers Stabiliser Bar',
        'Soft Ride With Leaf Spring Rear Suspension',
        'Semi Elliptic Leaf Spring Rear Suspension'], dtype=object)
In [114]:
#Jeep
In [115]:
main.loc[main["Model"]=="Jeep"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[115]:
array(['Multi Link Suspension with Strut Assembly',
        'Multi-Link with Strut Suspension with FSD, with Anti Roll Bar',
        'heavy duty with gas shocks', nan, 'Multi Link Suspension'],
      dtype=object)
In [116]:
com=main.loc[main["Model"]=="Jeep"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [117]:
com.unique()
Out[117]:
array(['Multi Link Rear Suspension With Strut Assembly',
        'Multi Link With Strut Rear Suspension With Fsd With Anti Roll Bar',
        'Heavy Duty With Gas Shocks Rear Suspension', nan,
        'Multi Link Rear Suspension'], dtype=object)
In [118]:
#Kia
In [119]:
main.loc[main["Model"]=="Kia"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[119]:
array(['Coupled Torsion Beam Axle with Coil Spring',
        'Coupled Torsion Beam Axle', 'Multi Link', 'Multi-Link'],
      dtype=object)
In [120]:
com=main.loc[main["Model"]=="Kia"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [121]:
com.unique()
Out[121]:
array(['Coupled Torsion Beam Axle With Coil Spring Rear Suspension',
        'Coupled Torsion Beam Axle Rear Suspension',
        'Multi Link Rear Suspension'], dtype=object)
In [122]:
#Lamborghini
In [123]:
main.loc[main["Model"]=="Lamborghini"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[123]:
array([nan, 'Aluminum double-wishbone suspension',
        'Push rod magneto-rheologic active with horizontal dampers',
        'adaptive air suspension'], dtype=object)
In [124]:
com=main.loc[main["Model"]=="Lamborghini"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [125]:
com.unique()
Out[125]:
array([nan, 'Aluminum Double Wishbone Rear Suspension',
        'Push Rod Magneto Rheologic Active With Horizontal Dampers Rear Suspension',
        'Adaptive Air Rear Suspension'], dtype=object)
In [126]:
#Land_Rover
In [127]:
main.loc[main["Model"]=="Land_Rover"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[127]:
array(['Multi-link', nan, 'Electronic Air Suspension',
        'Integral Coil Spring'], dtype=object)
In [128]:
com=main.loc[main["Model"]=="Land_Rover"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [129]:
com.unique()
Out[129]:
array(['Multi Link Rear Suspension', nan,
        'Electronic Air Rear Suspension',
        'Integral Coil Spring Rear Suspension'], dtype=object)
In [130]:
#Lexus
In [131]:
main.loc[main["Model"]=="Lexus"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[131]:
array(['Double-wishbone', 'multi-link suspension',
        'AIR Adaptive Variable Suspension', 'Double Wishbone Suspension',
        'Double Wishbone', '4-link Type with Coil Springs'], dtype=object)
In [132]:
com=main.loc[main["Model"]=="Lexus"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [133]:
com.unique()
Out[133]:
array(['Double Wishbone Rear Suspension', 'Multi Link Rear Suspension',
        'Air Adaptive Variable Rear Suspension',
        '4 Link Type With Coil Spring Rear Suspension'], dtype=object)
In [134]:
#Mahindra
In [135]:
main.loc[main["Model"]=="Mahindra"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[135]:
array(['Twist beam suspension with Coil Spring',
        'Pentalink Suspension with WATT’s Linkage with FDD & MTV-CL',
        '5 Link Rear Suspension with Coil Spring', 'Rigid leaf Spring',
        'Multi-Link Independent Suspension with FSD Stabilizer bar',
        'Semi-independent Twist Beam with Coil Spring',
        'Multilink Solid Rear Axle with Coil Over Damper & Stabiliser Bar',
        nan, 'Rigid axle with leaf spring',
        'H-Section Torsion Beam with Coil Spring',
        'Multi Link Coil Spring Suspension and Anti-roll Bar',
        'Twist Beam'], dtype=object)
In [136]:
com=main.loc[main["Model"]=="Mahindra"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [137]:
com.unique()
Out[137]:
array(['Twist Beam Rear Suspension With Coil Spring',
        'Pentalink Rear Suspension With Watt’S Linkage With Fdd With Mtv Cl',
        '5 Link Rear Suspension With Coil Spring Suspension',
        'Rigid Leaf Spring Rear Suspension',
        'Multi Link Independent Rear Suspension With Fsd Stabilizer Bar',
        'Semi Independent Twist Beam With Coil Spring Rear Suspension',
        'Multilink Solid Rear Axle With Coil Over Damper With Stabiliser Bar Suspension',
        nan, 'Rigid Axle With Leaf Spring Rear Suspension',
        'H Section Torsion Beam With Coil Spring Rear Suspension',
        'Multi Link Coil Spring Rear Suspension And Anti Roll Bar',
        'Twist Beam Rear Suspension'], dtype=object)
In [138]:
#Aston_Martin
In [139]:
main.loc[main["Model"]=="Aston_Martin"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[139]:
array(['multi-link, coil springs, anti-roll bar and adaptive damping Adaptive Damping System',
        'Multi-link, coil springs, anti-roll bar and adaptive dampers  Adaptive Damping System',
        'Multi-link'], dtype=object)
In [140]:
com=main.loc[main["Model"]=="Aston_Martin"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [141]:
com.unique()
Out[141]:
array(['Multi Link Coil Spring With Anti Roll Bar And Adaptive Damping Rear Suspension',
        'Multi Link Rear Suspension'], dtype=object)
In [142]:
#Maserati
In [143]:
main.loc[main["Model"]=="Maserati"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[143]:
array(['Air Adaptive Suspensions', 'Five-Arm Multilink', nan,
        'Quattroporte Sport GT S is fitted with the single-setting racing-style suspension system'],
      dtype=object)
In [144]:
com=main.loc[main["Model"]=="Maserati"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [145]:
com.unique()
Out[145]:
array(['Air Adaptive Rear Suspensions',
        'Five Arm Multilink Rear Suspension', nan,
        'Quattroporte Sport Gt S Is Fitted With The Single Setting Racing Style Rear Suspension System'],
      dtype=object)
In [146]:
#Mercedes-Benz
In [147]:
main.loc[main["Model"]=="Mercedes-Benz"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[147]:
array(['AIRMATIC', 'Air Suspension', 'AMG Suspension', nan,
        'AMG RIDE CONTROL sports suspension', 'Adaptive Damping System',
        'AMG RIDE CONTROL+', 'five-link multi-link independent suspension',
        'SUSPENSION WITH  ADAPTIVE DAMPING SYSTEM', 'ADAPTIVE DAMPING',
        'AMG RIDE CONTROL suspension', 'air suspension',
        'active roll stabilization intelligent suspension',
        'AIRMATIC suspension', 'Agility control',
        'Adaptive Air Suspension', 'AMG RIDE CONTROL',
        'DYNAMIC BODY CONTROL suspension', 'Coil spring',
        'Rigid Leaf Spring', 'adaptive damping Suspension'], dtype=object)
In [148]:
com=main.loc[main["Model"]=="Mercedes-Benz"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [149]:
com.unique()
Out[149]:
array(['Airmatic Rear Suspension', 'Air Rear Suspension',
        'Amg Rear Suspension', nan,
        'Amg Ride Control Sports Rear Suspension',
        'Adaptive Damping System Rear Suspension',
        'Amg Ride Control+ Rear Suspension',
        'Five Link Multi Link Independent Rear Suspension',
        'Rear Suspension With Adaptive Damping System',
        'Adaptive Damping Rear Suspension',
        'Amg Ride Control Rear Suspension',
        'Active Roll Stabilization Intelligent Rear Suspension',
        'Agility Control Rear Suspension', 'Adaptive Air Rear Suspension',
        'Dynamic Body Control Rear Suspension',
        'Coil Spring Rear Suspension', 'Rigid Leaf Spring Rear Suspension'],
      dtype=object)
In [150]:
#MG
In [151]:
main.loc[main["Model"]=="MG"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[151]:
array(['Semi Independent Helical Spring Torison Beam',
        'Semi Independent Helical Spring Torsion Beam',
        'Five Link Integral Suspension', 'Torsion Beam'], dtype=object)
In [152]:
com=main.loc[main["Model"]=="MG"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [153]:
com.unique()
Out[153]:
array(['Semi Independent Helical Spring Torsion Beam Rear Suspension',
        'Five Link Integral Rear Suspension',
        'Torsion Beam Rear Suspension'], dtype=object)
In [154]:
#Mini
In [155]:
main.loc[main["Model"]=="Mini"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[155]:
array(['multiple control-arm rear axle', 'Sport Suspension', 'Multi Link',
        nan, 'Multiple-Control-Arm'], dtype=object)
In [156]:
com=main.loc[main["Model"]=="Mini"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [157]:
com.unique()
Out[157]:
array(['Multiple Control Arm Rear Axle Suspension',
        'Sport Rear Suspension', 'Multi Link Rear Suspension', nan,
        'Multiple Control Arm Rear Suspension'], dtype=object)
In [158]:
#Mitsubishi
In [159]:
main.loc[main["Model"]=="Mitsubishi"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[159]:
array(['Independent Multi-link with Stabilizer bar',
        'Multi-Link Coil Spring with Stablizer Bar',
        '3 Link coil spring rigid axle with stabiliser bar',
        'Multi-link coil springs with stabilizer bar',
        'Independent Multi-link with stabilizer bar', nan,
        'Multi-Link With Bilstein Shock Absorbers,Eibach Springs & Stabilizer Bar',
        'Multi-Link Coil Springs With Stabilizer', 'Multi-Link', '3 Link',
        'Multi link with coil springs and stabilizer bar',
        '3 Link Coil Spring Suspension'], dtype=object)
In [160]:
com=main.loc[main["Model"]=="Mitsubishi"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [161]:
com.unique()
Out[161]:
array(['Independent Multi Link With Stabilizer Bar Rear Suspension',
        'Multi Link Coil Spring With Stabilizer Bar Rear Suspension',
        '3 Link Coil Spring Rigid Axle With Stabiliser Bar Rear Suspension',
        nan,
        'Multi Link With Bilstein Shock Absorbers With Eibach Spring With Stabilizer Bar Rear Suspension',
        'Multi Link Rear Suspension', '3 Link Rear Suspension',
        '3 Link Coil Spring Rear Suspension'], dtype=object)
In [162]:
#Nissan
In [163]:
main.loc[main["Model"]=="Nissan"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[163]:
array(['Twin tube telescopic shock absorber',
        'Torsion Beam with Coil Springs', 'Multi Link'], dtype=object)
In [164]:
com=main.loc[main["Model"]=="Nissan"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [165]:
com.unique()
Out[165]:
array(['Twin Tube Telescopic Shock Absorber Rear Suspension',
        'Torsion Beam With Coil Spring Rear Suspension',
        'Multi Link Rear Suspension'], dtype=object)
In [166]:
#Porsche
In [167]:
main.loc[main["Model"]=="Porsche"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[167]:
array(['Aluminum multi-link axle with subframe, independent wheel suspension',
        'Multi-Link', 'Adaptive air suspension', 'Active Suspension',
        'spring-strut suspension', 'Aluminium multi-link rear axle',
        'Self-Tracking Trapezoidal Link',
        'Lightweight spring-strut suspension', nan], dtype=object)
In [168]:
com=main.loc[main["Model"]=="Porsche"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [169]:
com.unique()
Out[169]:
array(['Aluminum Multi Link Axle With Subframe With Independent Wheel Rear Suspension',
        'Multi Link Rear Suspension', 'Adaptive Air Rear Suspension',
        'Active Rear Suspension', 'Spring Strut Rear Suspension',
        'Aluminium Multi Link Rear Axle Suspension',
        'Self Tracking Trapezoidal Link Rear Suspension',
        'Lightweight Spring Strut Rear Suspension', nan], dtype=object)
In [170]:
#Renault
In [171]:
main.loc[main["Model"]=="Renault"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[171]:
array(['Twist beam suspension with coil spring', 'Torsion beam axle'],
      dtype=object)
In [172]:
com=main.loc[main["Model"]=="Renault"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [173]:
com.unique()
Out[173]:
array(['Twist Beam Rear Suspension With Coil Spring',
        'Torsion Beam Axle Rear Suspension'], dtype=object)
In [174]:
#Rolls-Royce
In [175]:
main.loc[main["Model"]=="Rolls-Royce"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[175]:
array(['Multi Link', nan, 'multi-link rear axle'], dtype=object)
In [176]:
com=main.loc[main["Model"]=="Rolls-Royce"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [177]:
com.unique()
Out[177]:
array(['Multi Link Rear Suspension', nan,
        'Multi Link Rear Axle Suspension'], dtype=object)
In [178]:
#Skoda
In [179]:
main.loc[main["Model"]=="Skoda"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[179]:
array(['Twist Beam Axle',
        'Multi-element axle, with longitudinal and transverse links, with torsion stabiliser',
        'Multilink suspension, one longitudinal and three transverse arms',
        'Multi-element axle, with one longitudinal and transverse links, with torsion stabiliser'],
      dtype=object)
In [180]:
com=main.loc[main["Model"]=="Skoda"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [181]:
com.unique()
Out[181]:
array(['Twist Beam Axle Rear Suspension',
        'Multi Element Axle With Longitudinal And Transverse Links With Torsion Stabiliser Rear Suspension',
        'Multilink Rear Suspension With Longitudinal And Three Transverse Arms'],
      dtype=object)
In [182]:
#Tata
In [183]:
main.loc[main["Model"]=="Tata"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[183]:
array(['Semi Independent Twist Blade with Panhard Rod & Coil Spring',
        'Semi-Independent closed profile Twist beam with Coil Spring and shock absorber',
        'Twist Beam with Coil Spring',
        'Semi-independent Twist Beam With Coil Spring And Shock Absorber',
        'Twist beam with coil spring and shock absorber',
        'Semi-independent; Rear Twist Beam with Dual path Strut',
        'Semi Independent Twist Blade with Panhard Rod and Coil Spring',
        'Twist beam with dual path Strut',
        'Twist beam with dual path strut',
        'Twist Beam with Coil Spring and Shock Absorber',
        'Innovative Two-stage semi-elliptical leaf springs',
        'Semi-independent Closed Profile Twist Beam with Dual Path Strut',
        'Semi-Independent Closed Profile Twist Beam with Dual Path Strut',
        'Innovative Two-stage Semi-elliptical leaf springs-7leaves',
        'Rear Twist Beam with Coil Spring',
        'Twist Beam with Coil Spring and Shock Absorberf', nan],
      dtype=object)
In [184]:
com=main.loc[main["Model"]=="Tata"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [185]:
com.unique()
Out[185]:
array(['Semi Independent Twist Blade With Panhard Rod With Coil Spring Rear Suspension',
        'Semi Independent Closed Profile Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
        'Twist Beam With Coil Spring Rear Suspension',
        'Semi Independent Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
        'Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
        'Semi Independent With Rear Twist Beam With Dual Path Strut Suspension',
        'Semi Independent Twist Blade With Panhard Rod And Coil Spring Rear Suspension',
        'Twist Beam With Dual Path Strut Rear Suspension',
        'Innovative Two Stage Semi Elliptical Leaf Spring Rear Suspension',
        'Semi Independent Closed Profile Twist Beam With Dual Path Strut Rear Suspension',
        'Innovative Two Stage Semi Elliptical Leaf Spring 7Leaves Rear Suspension',
        'Rear Twist Beam With Coil Spring Suspension', nan], dtype=object)
In [186]:
#Toyota
In [187]:
main.loc[main["Model"]=="Toyota"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[187]:
array(['4-Link With Coil Spring', 'leaf spring',
        '4-Link with Coil Spring', 'Torsion Beam', nan, 'Double Wishbone'],
      dtype=object)
In [188]:
com=main.loc[main["Model"]=="Toyota"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [189]:
com.unique()
Out[189]:
array(['4 Link Coil Spring Rear Suspension',
        'Leaf Spring Rear Suspension', 'Torsion Beam Rear Suspension', nan,
        'Double Wishbone Rear Suspension'], dtype=object)
In [190]:
#Volkswagen
In [191]:
main.loc[main["Model"]=="Volkswagen"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[191]:
array(['Twist beam axle', 'Semi Indpendent Trailing Arm',
        'Twist beam axle`', 'Independent suspension by four-link axle'],
      dtype=object)
In [192]:
com=main.loc[main["Model"]=="Volkswagen"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [193]:
com.unique()
Out[193]:
array(['Twist Beam Axle Rear Suspension',
        'Semi Indpendent Trailing Arm Rear Suspension',
        'Independent Rear Suspension By Four Link Axle'], dtype=object)
In [194]:
#Volvo
In [195]:
main.loc[main["Model"]=="Volvo"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[195]:
array(['Air', nan, 'Multi Link', 'sophisticated Suspension'], dtype=object)
In [196]:
com=main.loc[main["Model"]=="Volvo"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [197]:
com.unique()
Out[197]:
array(['Air Rear Suspension', nan, 'Multi Link Rear Suspension',
        'Sophisticated Rear Suspension'], dtype=object)
In [198]:
#Apply this function to all brands
main["Rear Suspension"]=main["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [199]:
#preprocessed rear suspension
len(main["Rear Suspension"].unique())
Out[199]:
128
In [200]:
#Unpreprocessed rear suspension
len(main_data["Rear Suspension"].unique())
Out[200]:
180
In [201]:
main["Rear Suspension"].unique()
Out[201]:
array(['Torsion Beam Rear Suspension', nan,
        'Torsion Beam With Coil Spring Rear Suspension',
        '3 Link Rigid Axle Rear Suspension',
        'Leaf Spring Rigid Axle Rear Suspension',
        'Semi Independent Twist Beam With Twin Gas And Oil Filled Shock Absorbers Rear Suspension',
        'Semi Independent Twist Beam With Coil Spring Rear Suspension',
        'Semi Independent Twist Beam Rear Suspension',
        'Heavy Duty Twist Beam With Strut Type Coil Spring/Damper Units Rear Suspension',
        'Coil Spring With Anti Roll Bar Rear Suspension',
        'Twist Beam Rear Suspension',
        'Semi Independent Twist Beam With Twin Shock Absorbers Filled With Gas With Oil Rear Suspension',
        'Semi Independent Rear Suspension',
        'Integral Link Independent With Coil Spring With Stabilizer Bar Rear Suspension',
        'Semi Independent Heavy Duty Twist Beam With Coil Spring Rear Suspension',
        'Progressive Linear Rate Leaf Spring With Low Friction Pads Rear Suspension',
        'Coil Spring Watts Linkage Type With Anti Roll Bar Rear Suspension',
        'Progessive Linear Rate Leaf Spring With Low Friction Pads Rear Suspension',
        'Leaf Spring Rear Suspension', 'Air Rear Suspension',
        '4 Link Rear Suspension',
        'Five Link Rear Suspension With Tubular Anti Roll Bar',
        'Rs Sports Rear Suspension',
        'Five Link Axle With Tubular Anti Roll Bar With Air Spring Rear Suspension',
        'Sport Adaptive Air Rear Suspension',
        'Adaptive Air Rear Suspension', 'Rs Adaptive Air Rear Suspension',
        'S Sports Rear Suspension',
        'Trapezoidak Muliti Link Rear Suspension',
        'Adaptive 2 Axle Air Rear Suspension',
        'Adaptive M Specific Rear Suspension',
        'Adaptive M Rear Suspension', 'Five Arm Rear Suspension',
        'Dynamic Damper Control Rear Suspension',
        'M Sport Rear Suspension', 'Independent Damping Rear Suspension',
        'Airmatic Rear Suspension',
        'Adaptive Rear Suspension With Variable Shock Absorber',
        'Adaptive Rear Suspension',
        'Independent Double Wishbones Rear Suspension',
        'Multi Link Pan Hard Rod With Coil Spring Rear Suspension',
        'Independent With Multi Link Adaptive Damping Rear Suspension',
        'Magnetorheological Damper Rear Suspension',
        'Adaptive Magnetic Rear Suspension',
        'Torsion Beam Axle With Coil Spring Rear Suspension',
        'Twisted Torsion Beam With Coil Spring Rear Suspension',
        'Torsion Bar With Coil Spring Rear Suspension',
        'Coupled Torsion Beam Axle Rear Suspension',
        'Coupled Torsion Beam Axle With Coil Spring Rear Suspension',
        'Multi Link Coil Spring Rear Suspension',
        'Penta Link Coil Rear Suspension Gas Shock Absorbers Stabiliser Bar',
        'Soft Ride With Leaf Spring Rear Suspension',
        'Semi Elliptic Leaf Spring Rear Suspension',
        'Multi Link Rear Suspension With Strut Assembly',
        'Multi Link With Strut Rear Suspension With Fsd With Anti Roll Bar',
        'Heavy Duty With Gas Shocks Rear Suspension',
        'Multi Link Rear Suspension',
        'Aluminum Double Wishbone Rear Suspension',
        'Push Rod Magneto Rheologic Active With Horizontal Dampers Rear Suspension',
        'Electronic Air Rear Suspension',
        'Integral Coil Spring Rear Suspension',
        'Double Wishbone Rear Suspension',
        'Air Adaptive Variable Rear Suspension',
        '4 Link Type With Coil Spring Rear Suspension',
        'Twist Beam Rear Suspension With Coil Spring',
        'Pentalink Rear Suspension With Watt’S Linkage With Fdd With Mtv Cl',
        '5 Link Rear Suspension With Coil Spring Suspension',
        'Rigid Leaf Spring Rear Suspension',
        'Multi Link Independent Rear Suspension With Fsd Stabilizer Bar',
        'Multilink Solid Rear Axle With Coil Over Damper With Stabiliser Bar Suspension',
        'Rigid Axle With Leaf Spring Rear Suspension',
        'H Section Torsion Beam With Coil Spring Rear Suspension',
        'Multi Link Coil Spring Rear Suspension And Anti Roll Bar',
        'Multi Link Coil Spring With Anti Roll Bar And Adaptive Damping Rear Suspension',
        'Air Adaptive Rear Suspensions',
        'Five Arm Multilink Rear Suspension',
        'Quattroporte Sport Gt S Is Fitted With The Single Setting Racing Style Rear Suspension System',
        'Amg Rear Suspension', 'Amg Ride Control Sports Rear Suspension',
        'Adaptive Damping System Rear Suspension',
        'Amg Ride Control+ Rear Suspension',
        'Five Link Multi Link Independent Rear Suspension',
        'Rear Suspension With Adaptive Damping System',
        'Adaptive Damping Rear Suspension',
        'Amg Ride Control Rear Suspension',
        'Active Roll Stabilization Intelligent Rear Suspension',
        'Agility Control Rear Suspension',
        'Dynamic Body Control Rear Suspension',
        'Coil Spring Rear Suspension',
        'Semi Independent Helical Spring Torsion Beam Rear Suspension',
        'Five Link Integral Rear Suspension',
        'Multiple Control Arm Rear Axle Suspension',
        'Sport Rear Suspension', 'Multiple Control Arm Rear Suspension',
        'Independent Multi Link With Stabilizer Bar Rear Suspension',
        'Multi Link Coil Spring With Stabilizer Bar Rear Suspension',
        '3 Link Coil Spring Rigid Axle With Stabiliser Bar Rear Suspension',
        'Multi Link With Bilstein Shock Absorbers With Eibach Spring With Stabilizer Bar Rear Suspension',
        '3 Link Rear Suspension', '3 Link Coil Spring Rear Suspension',
        'Twin Tube Telescopic Shock Absorber Rear Suspension',
        'Aluminum Multi Link Axle With Subframe With Independent Wheel Rear Suspension',
        'Active Rear Suspension', 'Spring Strut Rear Suspension',
        'Aluminium Multi Link Rear Axle Suspension',
        'Self Tracking Trapezoidal Link Rear Suspension',
        'Lightweight Spring Strut Rear Suspension',
        'Torsion Beam Axle Rear Suspension',
        'Multi Link Rear Axle Suspension',
        'Twist Beam Axle Rear Suspension',
        'Multi Element Axle With Longitudinal And Transverse Links With Torsion Stabiliser Rear Suspension',
        'Multilink Rear Suspension With Longitudinal And Three Transverse Arms',
        'Semi Independent Twist Blade With Panhard Rod With Coil Spring Rear Suspension',
        'Semi Independent Closed Profile Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
        'Twist Beam With Coil Spring Rear Suspension',
        'Semi Independent Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
        'Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
        'Semi Independent With Rear Twist Beam With Dual Path Strut Suspension',
        'Semi Independent Twist Blade With Panhard Rod And Coil Spring Rear Suspension',
        'Twist Beam With Dual Path Strut Rear Suspension',
        'Innovative Two Stage Semi Elliptical Leaf Spring Rear Suspension',
        'Semi Independent Closed Profile Twist Beam With Dual Path Strut Rear Suspension',
        'Innovative Two Stage Semi Elliptical Leaf Spring 7Leaves Rear Suspension',
        'Rear Twist Beam With Coil Spring Suspension',
        '4 Link Coil Spring Rear Suspension',
        'Semi Indpendent Trailing Arm Rear Suspension',
        'Independent Rear Suspension By Four Link Axle',
        'Sophisticated Rear Suspension'], dtype=object)
In [202]:
main.columns[36]
Out[202]:
'Petrol Mileage (ARAI)'
In [203]:
main["Steering Type"]
Out[203]:
0      Electric
1        Manual
          ...   
548         NaN
549         NaN
Name: Steering Type, Length: 160031, dtype: object
In [204]:
main["Steering Type"].unique()
Out[204]:
array(['Electric', 'Manual', nan, 'Power', 'MT', 'EPAS', 'Electrical',
        'electric', 'power', 'Motor', 'Electro', 'Direct', 'Hydraulic',
        'Electronic'], dtype=object)
In [205]:
def preprocess_Steering_Type(text):
    text=str(text)
    if text=="nan":
        return np.nan
    text=text.title()
    if text=="Electrical":
        text=text.replace("Electrical","Electric")
    if text=="Electro":
        text=text.replace("Electro","Electric")
    if text=="Electronic":
        text=text.replace("Electronic","Electric")
    return text
In [206]:
main["Steering Type"]=main["Steering Type"].apply(preprocess_Steering_Type)
In [207]:
main["Steering Type"].unique()
Out[207]:
array(['Electric', 'Manual', nan, 'Power', 'Mt', 'Epas', 'Motor',
        'Direct', 'Hydraulic'], dtype=object)
In [208]:
main.columns[37]
Out[208]:
'Petrol Fuel Tank Capacity (Litres)'
In [209]:
main["Steering Column"]
Out[209]:
0      Tilt
1       NaN
        ... 
548     NaN
549     NaN
Name: Steering Column, Length: 160031, dtype: object
In [210]:
main["Steering Column"].unique()
Out[210]:
array(['Tilt', nan, 'Tilt & Telescopic', 'Collapsible',
        'Tilt and Telescopic', 'Tilt & Collapsible', 'Tilt Adjustable',
        'Tilt adjust', 'Tilt And Collapsible', 'Tilt Adjustuble',
        'Tilt Steering', 'Tilt adjustuble', 'Adjustable', 'Height & Reach',
        'Tilt & Reach Adjustment', 'Tilt&Telescope', 'Adjustable Steering',
        'Tiltable & Telescopic', 'Telescopic & Tilt', 'Tilt & telescopic',
        'Collapsible Steering', 'rick and pin', 'Power',
        'Tilt and Collapsible', 'Height & Reach Adjustment',
        'tilt and telescopic', 'Electrical Adjustable Steering',
        'Electrically Adjustable', 'Tilt Adjustable Steering',
        'Hydraulic Variable Power Assis', 'Tilt & Collapsible Steering',
        'Collapsible Steering Column', 'Low Tilt Steering',
        'Rack & Pinion', 'Electric Power Steering', 'Tilt & Adjustable'],
      dtype=object)
In [211]:
def preprocess_Steering_Column(text):
    text=str(text)
    if text=="nan":
        return np.nan
    #One Exception case
    if "Telescopic & Tilt" in text:
        return "Tilt And Telescopic"
    text=text.title()
    if "Steering" in text:
        text=text.replace("Steering","")
    if "Column" in text:
        text=text.replace("Column","")
    if "  " in text:
        text=text.replace("  "," ")
    if text[len(text)-1]==" ":
        text=text[:len(text)-1]
    if "Tilt" in text and (len(text)==4 or "Low" in text):
        text=text.replace("Tilt","Tiltable")
    else:
        text=text.replace("Tiltable","Tilt")
    if "&" in text:
        index=text.find("&")
        if text[index-1]!=" " and text[index+1]!=" ":
            text=text.replace("&"," And ")
        else:
            text=text.replace("&","And")
    if "Adjust" in text:
        ind=text.find('Adjust')
        try:
            text[ind+6]
        except:
            text=text.replace("Adjust","Adjustable")
    if "Adjustuble" in text:
        text=text.replace("Adjustuble","Adjustable")
    if "Tilt" in text and "Adjustable" in text and "And" not in text:
        tilt_index=text.find("Tilt")
        text=text[:tilt_index+4]+" And"+text[tilt_index+4:]
    if "Telescope" in text:
        text=text.replace("Telescope","Telescopic")
    if "Electrically" in text:
        text=text.replace("Electrically","Electrical")
    return text
In [212]:
main["Steering Column"]=main["Steering Column"].apply(preprocess_Steering_Column)
In [213]:
main["Steering Column"].unique()
Out[213]:
array(['Tiltable', nan, 'Tilt And Telescopic', 'Collapsible',
        'Tilt And Collapsible', 'Tilt And Adjustable', 'Adjustable',
        'Height And Reach', 'Tilt And Reach Adjustment', 'Rick And Pin',
        'Power', 'Height And Reach Adjustment', 'Electrical Adjustable',
        'Hydraulic Variable Power Assis', 'Low Tiltable',
        'Rack And Pinion', 'Electric Power'], dtype=object)
In [214]:
main.columns[38]
Out[214]:
'Emission Norm Compliance'
In [215]:
main["Front Brake Type"]
Out[215]:
0      Disc
1      Disc
        ... 
548     NaN
549     NaN
Name: Front Brake Type, Length: 160031, dtype: object
In [216]:
main["Front Brake Type"].unique()
Out[216]:
array(['Disc', 'Ventilated Disc', nan, 'Ventilated Discs',
        'Ventillated Discs', 'Ventillated Disc', 'ceramic Ventilated Disc',
        'Ceramic Disc', 'Vented Discs', 'ventilated disc', 'VeDi',
        'carbon ceramic brakes', 'Ventilated Disc with Twin Pot Caliper',
        'disc', 'Carbon Ceramic Brake', 'Carbon ceramic',
        'Twin piston sliding fist caliper', 'Ventilated discs',
        'Ventilated two piece steel brake discs', 'Ventilated steel discs',
        'Vantilated Disc', 'Discs', 'Aluminium monobloc Disc',
        'ventilated Disc'], dtype=object)
In [217]:
def preprocess_Front_Break_Type(text):
    text=str(text)
    if text=="nan":
        return np.nan
    text=text.title()
    if "Ventillated" in text:
        text=text.replace("Ventillated","Ventilated")
    if "Vantilated" in text:
        text=text.replace("Vantilated","Ventilated")
    if "Discs" in text:
        text=text.replace("Discs","Disc")
    if "Brakes" in text:
        text=text.replace("Brakes","")
    if "Brake" in text:
        text=text.replace("Brake","")
    if "  " in text:
        text=text.replace("  "," ")
    if text[len(text)-1]==" ":
        text=text[:len(text)-1]
    return text    
In [218]:
main["Front Brake Type"]=main["Front Brake Type"].apply(preprocess_Front_Break_Type)
In [219]:
main["Front Brake Type"].unique()
Out[219]:
array(['Disc', 'Ventilated Disc', nan, 'Ceramic Ventilated Disc',
        'Ceramic Disc', 'Vented Disc', 'Vedi', 'Carbon Ceramic',
        'Ventilated Disc With Twin Pot Caliper',
        'Twin Piston Sliding Fist Caliper',
        'Ventilated Two Piece Steel Disc', 'Ventilated Steel Disc',
        'Aluminium Monobloc Disc'], dtype=object)
In [220]:
main.columns[39]
Out[220]:
'Front Suspension'
In [221]:
main["Rear Brake Type"]
Out[221]:
0      Drum
1      Drum
        ... 
548     NaN
549     NaN
Name: Rear Brake Type, Length: 160031, dtype: object
In [222]:
main["Rear Brake Type"].unique()
Out[222]:
array(['Drum', 'Solid Disc', nan, 'Disc', 'Self adjusting Drums',
        'Self Adjusting Drum', 'Self Adjusting Drums',
        'Self-Adjusting Drum', 'Ventilated Disc',
        'ceramic Ventilated Disc', 'Ceramic Disc', 'Vented Discs',
        'ventilated disc', 'VeDi-S-ABS', 'carbon ceramic brakes', 'disc',
        'carbon ceramic brakes.', 'Carbon Ceramic Brake', 'Carbon ceramic',
        'Single piston sliding fist', 'Ventilated discs',
        'Ventilated Discs', 'drum',
        'Ventilated two piece steel brake discs', 'Ventilated steel discs',
        'Drum in Discs', 'Disc & Drum', 'Drum in disc', 'Ventilated Drum',
        'Ventilated Drum In Discs', 'Discs', 'Aluminium monobloc Disc',
        'Drum`'], dtype=object)
In [223]:
def preprocess_Rear_Brake_Type(text):
    list_elements_to_null_out=[".","Brakes","Brake","`"]
    text=str(text)
    if text=="nan":
        return np.nan
    #one exception case
    if "Disc & Drum" in text:
        return "Drum & Disc"
    text=text.title()
    if "In" in text:
        text=text.replace("In","&")
    if "Drums" in text:
        text=text.replace("Drums","Drum")
    if "Discs" in text:
        text=text.replace("Discs","Disc")
    if "-" in text:
        text=text.replace("-"," ")
    for i in list_elements_to_null_out:
        text=text.replace("{}".format(i),"")
    if "  " in text:
        text=text.replace("  "," ")
    if text[len(text)-1]==" ":
        text=text[:len(text)-1]
    return text
In [224]:
main["Rear Brake Type"]=main["Rear Brake Type"].apply(preprocess_Rear_Brake_Type)
In [225]:
main["Rear Brake Type"].unique()
Out[225]:
array(['Drum', 'Solid Disc', nan, 'Disc', 'Self Adjusting Drum',
        'Ventilated Disc', 'Ceramic Ventilated Disc', 'Ceramic Disc',
        'Vented Disc', 'Vedi S Abs', 'Carbon Ceramic',
        'Single Piston Sliding Fist', 'Ventilated Two Piece Steel Disc',
        'Ventilated Steel Disc', 'Drum & Disc', 'Ventilated Drum',
        'Ventilated Drum & Disc', 'Aluminium Monobloc Disc'], dtype=object)
In [226]:
main.columns[40]
Out[226]:
'Rear Suspension'
In [227]:
main["Length (mm)"]
Out[227]:
0      3995.0
1      3675.0
        ...  
548       NaN
549       NaN
Name: Length (mm), Length: 160031, dtype: float64
In [228]:
main.columns[41]
Out[228]:
'Steering Type'
In [229]:
main['Width (mm)']
Out[229]:
0      1735.0
1      1475.0
        ...  
548       NaN
549       NaN
Name: Width (mm), Length: 160031, dtype: float64
In [230]:
main.columns[42]
Out[230]:
'Steering Column'
In [231]:
main["Height (mm)"]
Out[231]:
0      1515.0
1      1825.0
        ...  
548       NaN
549       NaN
Name: Height (mm), Length: 160031, dtype: object
In [232]:
main["Height (mm)"].unique()
Out[232]:
array([1515.0, 1825.0, 1595.0, 1553.0, nan, 1485.0, 1530.0, 1475.0,
        1755.0, 1500.0, 1555.0, 1690.0, 1520.0, 1567.0, 1685.0, 1675.0,
        1837.0, 1427.0, 1647.0, 1525.0, 1703.0, 1418.0, 1433.0, 1686.0,
        1488.0, 1673.0, 1652.0, 1745.0, 1304.0, 1598.0, 1420.0, 1695.0,
        1469.0, 1448.0, 1497.0, 1667.0, 1676.0, 1212.0, 2075, 1318.0,
        1187.0, 1206.0, 1276.0, 1186.0, '1544', '1489', '1601',
        '1498-1501', '1495', 1635.0, 1560.0, 1617.0, 1665.0, 1505.0, 1860,
        1840, 1790, 1640.0, 1698.0, 1838.0, 1848.0, 1708.0, 1645.0, 1642.0,
        1550.0, 1180.0, 1165.0, 1888.0, 1724.0, 1857.0, 1627.0, 1845.0,
        1655.0, 1844.0, 1855.0, 1995.0, 1774.0, 1300.0, 1495.0, 1624.0,
        1400.0, 1446.0, 1510.0, 1644.0, 1720.0, 1411.0, 1447.0, 1588.0,
        1437.0, 1503.0, 1716.0, 1823.0, 1909.0, 1402.0, 1585.0, 1611.0,
        1880.0, 1901.0, 1518.0, 1760.0, 1867.0, 1650.0, 1649.0, 1710.0,
        1279.0, 1395.0, 1636.0, 1299.0, 1605, 1643, 1490, 1552.0, 1835.0,
        1612.0, 1706.0, 1606.0, 1535.0, 1615.0, 1537.0, 1786.0, 1616.0,
        1532.0, 1523.0, 1810.0, 1815.0, 1795.0, 1443.0, 1431.0],
      dtype=object)
In [233]:
def preprocess_Height(text):
    if type(text)==str:
        lists=text.split("-")
        if len(lists)==1:
            return float(text)
        else:
            first_=float(lists[0])
            second_=float(lists[1])
            avg_height=(first_+second_)/2    
            return avg_height
    return text
In [234]:
main["Height (mm)"]=main["Height (mm)"].apply(preprocess_Height)
In [235]:
main["Height (mm)"]
Out[235]:
0      1515.0
1      1825.0
        ...  
548       NaN
549       NaN
Name: Height (mm), Length: 160031, dtype: float64
In [236]:
main["Boot Space (Litres)"]
Out[236]:
0      378.0
1        NaN
        ...  
548      NaN
549      NaN
Name: Boot Space (Litres), Length: 160031, dtype: object
In [237]:
main["Boot Space (Litres)"].unique()
Out[237]:
array([378.0, nan, 375.0, 313.0, 510.0, 268.0, 260.0, 209.0, 318.0, 214.0,
        328.0, 341.0, '352', '284 ers', '3371,175', '400.0', '430', '359',
        '284', '430.0', '257.0', '400', '257', '359re', '430re', '346',
        '359 re', '2,055', 530.0, 335.0, 560.0, 465.0, 505.0, 460.0, 605.0,
        535.0, '358ers', '484', '358', '420', '390 re', '281.0', '480.0',
        '450', '326.0', '440', '470/1290', 37.0, 500, 200.0, 272.0, 210.0,
        74.0, 354.0, 506.0, 363.0, 420.0, 235.0, 311.0, 433.0, 392.0,
        540.0, 110.0, 616.0, 740.0, '520l', '454', '259', '259 l', '243.0',
        '384', '510.0', '190.0', '384.0', '190', '259.0', '580', '530ers',
        '173', 825.0, 520.0, 550.0, 285.0, 480.0, '587.0', '155 l', '211',
        '160', '211rs', '1,050', '420.0', '500.0', 336, 400, 315, '132',
        '446lts', '132 l', '598', '405lts', '275', '405', '446 lts', '645',
        '458', '150', '125', '625', '770', '772', '745', 405, 84, 279, 521,
        270, 385, 600, 625, 425.0, 350.0, 366.0, 345.0, 242.0, 73.0, 316.0,
        205.0, 419.0, 300.0, '385', '521', '494', '615s', '300', '495lts',
        '414'], dtype=object)
In [238]:
def preprocess_Boot_Space(text):
    text=str(text)
    if text=="nan":
        return np.nan
    lists=text.split("/")
    if len(lists)==1:
        text=text.replace(",","")
        text=text.replace("".join(re.findall('[a-zA-Z]',text)),"")
        if text[len(text)-1]==" ":
            text=text[:len(text)-1]
        text=float(text)
    else:
        f=float(lists[0])
        s=float(lists[1])
        text=(f+s)/2
    return text
In [239]:
preprocess_Boot_Space('480.0')
Out[239]:
480.0
In [240]:
main["Boot Space (Litres)"]=main["Boot Space (Litres)"].apply(preprocess_Boot_Space)
In [739]:
main["Boot Space (Litres)"]
Out[739]:
0         378.0
1           NaN
2         375.0
3         378.0
4           NaN
          ...  
160026    414.0
160027      NaN
160028      NaN
160029      NaN
160030      NaN
Name: Boot Space (Litres), Length: 160031, dtype: float64
In [740]:
main.columns[43]
Out[740]:
'Power Windows-Front'
In [741]:
main["Power Windows-Front"]
Out[741]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Power Windows-Front, Length: 160031, dtype: object
In [742]:
main["Power Windows-Front"].unique()
Out[742]:
array(['YES', nan, 'NO', '5', '4', '2', '1890', '2923', '506mm', '3200',
        '348', '2765'], dtype=object)
In [743]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows-Front"])
Out[743]:
<AxesSubplot:xlabel='Power Windows-Front', ylabel='count'>
In [744]:
main["Power Windows-Front"]=main["Power Windows-Front"].apply(lambda x:preprocess_to_null_out(x,False))
In [745]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows-Front"])
Out[745]:
<AxesSubplot:xlabel='Power Windows-Front', ylabel='count'>
In [746]:
main.columns[44]
Out[746]:
'Power Windows-Rear'
In [747]:
main["Power Windows-Rear"]
Out[747]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Power Windows-Rear, Length: 160031, dtype: object
In [748]:
main["Power Windows-Rear"].unique()
Out[748]:
array(['YES', nan, 'NO', '5', '4', '1890', '2923', '2', '506mm', '348',
        '2765'], dtype=object)
In [749]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows-Rear"])
Out[749]:
<AxesSubplot:xlabel='Power Windows-Rear', ylabel='count'>
In [750]:
main["Power Windows-Rear"]=main["Power Windows-Rear"].apply(lambda x:preprocess_to_null_out(x,False))
In [751]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows-Rear"])
Out[751]:
<AxesSubplot:xlabel='Power Windows-Rear', ylabel='count'>
In [752]:
main.columns[45]
Out[752]:
'Heater'
In [753]:
main["Heater"]
Out[753]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Heater, Length: 160031, dtype: object
In [754]:
main["Heater"].unique()
Out[754]:
array(['YES', nan, 'NO', '5', '4', '2', '2923', '506mm', '348'],
      dtype=object)
In [755]:
main["Heater"]=main["Heater"].apply(lambda x:preprocess_to_null_out(x,False))
In [756]:
plt.figure(figsize=(15,5))
sns.countplot(main["Heater"])
Out[756]:
<AxesSubplot:xlabel='Heater', ylabel='count'>
In [757]:
main.columns[46]
Out[757]:
'Adjustable Steering'
In [758]:
main["Adjustable Steering"]
Out[758]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Adjustable Steering, Length: 160031, dtype: object
In [759]:
main["Adjustable Steering"].unique()
Out[759]:
array(['YES', nan, 'NO', '5', '4', '2', '2923', '348'], dtype=object)
In [760]:
main["Adjustable Steering"]=main["Adjustable Steering"].apply(lambda x:preprocess_to_null_out(x,False))
In [761]:
plt.figure(figsize=(15,5))
sns.countplot(main["Adjustable Steering"])
Out[761]:
<AxesSubplot:xlabel='Adjustable Steering', ylabel='count'>
In [762]:
main.columns[52]
Out[762]:
'Rear Seat Centre Arm Rest'
In [763]:
main["Rear Seat Centre Arm Rest"].unique()
Out[763]:
array(['YES', nan, 'NO', '5', '4', '2 Zone'], dtype=object)
In [764]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Seat Centre Arm Rest"])
Out[764]:
<AxesSubplot:xlabel='Rear Seat Centre Arm Rest', ylabel='count'>
In [765]:
main["Rear Seat Centre Arm Rest"]=main["Rear Seat Centre Arm Rest"].apply(lambda x:preprocess_to_null_out(x,False))
In [766]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Seat Centre Arm Rest"])
Out[766]:
<AxesSubplot:xlabel='Rear Seat Centre Arm Rest', ylabel='count'>
In [767]:
main.columns[53]
Out[767]:
'Height Adjustable Front Seat Belts'
In [768]:
main["Height Adjustable Front Seat Belts"].unique()
Out[768]:
array([nan, 'YES', 'NO', '1600.0', '1600', '5', '4'], dtype=object)
In [769]:
plt.figure(figsize=(15,5))
sns.countplot(main["Height Adjustable Front Seat Belts"])
Out[769]:
<AxesSubplot:xlabel='Height Adjustable Front Seat Belts', ylabel='count'>
In [770]:
main["Height Adjustable Front Seat Belts"]=main["Height Adjustable Front Seat Belts"].apply(lambda x:preprocess_to_null_out(x,False))
In [771]:
plt.figure(figsize=(15,5))
sns.countplot(main["Height Adjustable Front Seat Belts"])
Out[771]:
<AxesSubplot:xlabel='Height Adjustable Front Seat Belts', ylabel='count'>
In [772]:
main.columns[55]
Out[772]:
'Rear AC Vents'
In [773]:
main["Rear AC Vents"]
Out[773]:
0         YES
1         NaN
2         NaN
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Rear AC Vents, Length: 160031, dtype: object
In [774]:
main["Rear AC Vents"].unique()
Out[774]:
array(['YES', nan, 'NO'], dtype=object)
In [775]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear AC Vents"])
Out[775]:
<AxesSubplot:xlabel='Rear AC Vents', ylabel='count'>
In [776]:
main.columns[56]
Out[776]:
'Seat Lumbar Support'
In [777]:
main["Seat Lumbar Support"]
Out[777]:
0         NaN
1         NaN
2         YES
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Seat Lumbar Support, Length: 160031, dtype: object
In [778]:
main["Seat Lumbar Support"].unique()
Out[778]:
array([nan, 'YES', 'NO'], dtype=object)
In [779]:
plt.figure(figsize=(15,5))
sns.countplot(main["Seat Lumbar Support"])
Out[779]:
<AxesSubplot:xlabel='Seat Lumbar Support', ylabel='count'>
In [780]:
main.columns[57]
Out[780]:
'Cruise Control'
In [781]:
main["Cruise Control"]
Out[781]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Cruise Control, Length: 160031, dtype: object
In [782]:
main["Cruise Control"].unique()
Out[782]:
array(['YES', nan, 'NO'], dtype=object)
In [783]:
plt.figure(figsize=(15,5))
sns.countplot(main["Cruise Control"])
Out[783]:
<AxesSubplot:xlabel='Cruise Control', ylabel='count'>
In [784]:
main.columns[58]
Out[784]:
'Smart Access Card Entry'
In [785]:
main["Smart Access Card Entry"]
Out[785]:
0         YES
1         NaN
2         NaN
3         YES
4         NaN
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Smart Access Card Entry, Length: 160031, dtype: object
In [786]:
main["Smart Access Card Entry"].unique()
Out[786]:
array(['YES', nan, 'NO'], dtype=object)
In [787]:
plt.figure(figsize=(15,5))
sns.countplot(main["Smart Access Card Entry"])
Out[787]:
<AxesSubplot:xlabel='Smart Access Card Entry', ylabel='count'>
In [788]:
main.columns[59]
Out[788]:
'KeyLess Entry'
In [789]:
main["KeyLess Entry"].unique()
Out[789]:
array(['YES', nan, 'NO'], dtype=object)
In [790]:
plt.figure(figsize=(15,5))
sns.countplot(main["KeyLess Entry"])
Out[790]:
<AxesSubplot:xlabel='KeyLess Entry', ylabel='count'>
In [791]:
main.columns[60]
Out[791]:
'Engine Start/Stop Button'
In [792]:
main["Engine Start/Stop Button"]
Out[792]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Engine Start/Stop Button, Length: 160031, dtype: object
In [793]:
main["Engine Start/Stop Button"].unique()
Out[793]:
array(['YES', nan, 'NO'], dtype=object)
In [794]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Start/Stop Button"])
Out[794]:
<AxesSubplot:xlabel='Engine Start/Stop Button', ylabel='count'>
In [795]:
main.columns[61]
Out[795]:
'Glove Box Cooling'
In [796]:
main["Glove Box Cooling"]
Out[796]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Glove Box Cooling, Length: 160031, dtype: object
In [797]:
main["Glove Box Cooling"].unique()
Out[797]:
array([nan, 'YES', 'NO'], dtype=object)
In [798]:
plt.figure(figsize=(15,5))
sns.countplot(main["Glove Box Cooling"])
Out[798]:
<AxesSubplot:xlabel='Glove Box Cooling', ylabel='count'>
In [799]:
main.columns[62]
Out[799]:
'Voice Control'
In [800]:
main["Voice Control"]
Out[800]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Voice Control, Length: 160031, dtype: object
In [801]:
main["Voice Control"].unique()
Out[801]:
array(['YES', nan, 'NO'], dtype=object)
In [802]:
plt.figure(figsize=(15,5))
sns.countplot(main["Voice Control"])
Out[802]:
<AxesSubplot:xlabel='Voice Control', ylabel='count'>
In [803]:
main.columns[63]
Out[803]:
'Gear Shift Indicator'
In [804]:
main["Gear Shift Indicator"]
Out[804]:
0         YES
1         NaN
2         NaN
3          NO
4          NO
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Gear Shift Indicator, Length: 160031, dtype: object
In [805]:
main["Gear Shift Indicator"].unique()
Out[805]:
array(['YES', nan, 'NO', '7'], dtype=object)
In [806]:
plt.figure(figsize=(15,5))
sns.countplot(main["Gear Shift Indicator"])
Out[806]:
<AxesSubplot:xlabel='Gear Shift Indicator', ylabel='count'>
In [807]:
main["Gear Shift Indicator"]=main["Gear Shift Indicator"].apply(lambda x:preprocess_to_null_out(x,False))
In [808]:
plt.figure(figsize=(15,5))
sns.countplot(main["Gear Shift Indicator"])
Out[808]:
<AxesSubplot:xlabel='Gear Shift Indicator', ylabel='count'>
In [809]:
main.columns[64]
Out[809]:
'Tachometer'
In [810]:
main["Tachometer"]
Out[810]:
0         YES
1         NaN
2         NaN
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Tachometer, Length: 160031, dtype: object
In [811]:
main["Tachometer"].unique()
Out[811]:
array(['YES', nan, 'NO', '2'], dtype=object)
In [812]:
plt.figure(figsize=(15,5))
sns.countplot(main["Tachometer"])
Out[812]:
<AxesSubplot:xlabel='Tachometer', ylabel='count'>
In [813]:
main["Tachometer"]=main["Tachometer"].apply(lambda x:preprocess_to_null_out(x,False))
In [814]:
plt.figure(figsize=(15,5))
sns.countplot(main["Tachometer"])
Out[814]:
<AxesSubplot:xlabel='Tachometer', ylabel='count'>
In [815]:
main.columns[65]
Out[815]:
'Electronic Multi-Tripmeter'
In [816]:
main["Electronic Multi-Tripmeter"]
Out[816]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Electronic Multi-Tripmeter, Length: 160031, dtype: object
In [817]:
main["Electronic Multi-Tripmeter"].unique()
Out[817]:
array(['YES', nan, 'NO'], dtype=object)
In [818]:
plt.figure(figsize=(15,5))
sns.countplot(main["Electronic Multi-Tripmeter"])
Out[818]:
<AxesSubplot:xlabel='Electronic Multi-Tripmeter', ylabel='count'>
In [819]:
main.columns[[66]]
Out[819]:
Index(['Fabric Upholstery'], dtype='object')
In [820]:
main["Fabric Upholstery"]
Out[820]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Fabric Upholstery, Length: 160031, dtype: object
In [821]:
main["Fabric Upholstery"].unique()
Out[821]:
array(['YES', nan, 'NO'], dtype=object)
In [822]:
plt.figure(figsize=(15,5))
sns.countplot(main["Fabric Upholstery"])
Out[822]:
<AxesSubplot:xlabel='Fabric Upholstery', ylabel='count'>
In [823]:
main.columns[67]
Out[823]:
'Leather Steering Wheel'
In [824]:
main["Leather Steering Wheel"]
Out[824]:
0         YES
1         NaN
2          NO
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Leather Steering Wheel, Length: 160031, dtype: object
In [825]:
main["Leather Steering Wheel"].unique()
Out[825]:
array(['YES', nan, 'NO', '2'], dtype=object)
In [826]:
plt.figure(figsize=(15,5))
sns.countplot(main["Leather Steering Wheel"])
Out[826]:
<AxesSubplot:xlabel='Leather Steering Wheel', ylabel='count'>
In [827]:
main["Leather Steering Wheel"]=main["Leather Steering Wheel"].apply(lambda x:preprocess_to_null_out(x,False))
In [828]:
plt.figure(figsize=(15,5))
sns.countplot(main["Leather Steering Wheel"])
Out[828]:
<AxesSubplot:xlabel='Leather Steering Wheel', ylabel='count'>
In [829]:
main.columns[68]
Out[829]:
'Glove Compartment'
In [830]:
main["Glove Compartment"]
Out[830]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Glove Compartment, Length: 160031, dtype: object
In [831]:
main["Glove Compartment"].unique()
Out[831]:
array(['YES', nan, 'NO', '2'], dtype=object)
In [832]:
plt.figure(figsize=(15,5))
sns.countplot(main["Glove Compartment"])
Out[832]:
<AxesSubplot:xlabel='Glove Compartment', ylabel='count'>
In [833]:
main["Glove Compartment"]=main["Glove Compartment"].apply(lambda x:preprocess_to_null_out(x,False))
In [834]:
plt.figure(figsize=(15,5))
sns.countplot(main["Glove Compartment"])
Out[834]:
<AxesSubplot:xlabel='Glove Compartment', ylabel='count'>
In [835]:
main.columns[69]
Out[835]:
'Digital Clock'
In [836]:
main["Digital Clock"]
Out[836]:
0         YES
1         NaN
2         NaN
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Digital Clock, Length: 160031, dtype: object
In [837]:
main["Digital Clock"].unique()
Out[837]:
array(['YES', nan, 'NO'], dtype=object)
In [838]:
plt.figure(figsize=(15,5))
sns.countplot(main["Digital Clock"])
Out[838]:
<AxesSubplot:xlabel='Digital Clock', ylabel='count'>
In [839]:
main.columns[70]
Out[839]:
'Digital Odometer'
In [840]:
main["Digital Odometer"]
Out[840]:
0         NaN
1         YES
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Digital Odometer, Length: 160031, dtype: object
In [841]:
main["Digital Odometer"].unique()
Out[841]:
array([nan, 'YES', 'NO'], dtype=object)
In [842]:
plt.figure(figsize=(15,5))
sns.countplot(main["Digital Odometer"])
Out[842]:
<AxesSubplot:xlabel='Digital Odometer', ylabel='count'>
In [843]:
main.columns[71]
Out[843]:
'Height Adjustable Driver Seat'
In [844]:
main["Height Adjustable Driver Seat"]
Out[844]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Height Adjustable Driver Seat, Length: 160031, dtype: object
In [845]:
main["Height Adjustable Driver Seat"].unique()
Out[845]:
array(['YES', nan, 'NO'], dtype=object)
In [846]:
plt.figure(figsize=(15,5))
sns.countplot(main["Height Adjustable Driver Seat"])
Out[846]:
<AxesSubplot:xlabel='Height Adjustable Driver Seat', ylabel='count'>
In [847]:
main.columns[72]
Out[847]:
'Dual Tone Dashboard'
In [848]:
main["Dual Tone Dashboard"]
Out[848]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Dual Tone Dashboard, Length: 160031, dtype: object
In [849]:
main["Dual Tone Dashboard"].unique()
Out[849]:
array(['YES', nan, 'NO'], dtype=object)
In [850]:
plt.figure(figsize=(15,5))
sns.countplot(main["Dual Tone Dashboard"])
Out[850]:
<AxesSubplot:xlabel='Dual Tone Dashboard', ylabel='count'>
In [851]:
main.columns[73]
Out[851]:
'Adjustable Headlights'
In [852]:
main["Adjustable Headlights"]
Out[852]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Adjustable Headlights, Length: 160031, dtype: object
In [853]:
main["Adjustable Headlights"].unique()
Out[853]:
array(['YES', nan, 'NO'], dtype=object)
In [854]:
plt.figure(figsize=(15,5))
sns.countplot(main["Adjustable Headlights"])
Out[854]:
<AxesSubplot:xlabel='Adjustable Headlights', ylabel='count'>
In [855]:
main.columns[74]
Out[855]:
'Electric Folding Rear View Mirror'
In [856]:
main["Electric Folding Rear View Mirror"]
Out[856]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Electric Folding Rear View Mirror, Length: 160031, dtype: object
In [857]:
main["Electric Folding Rear View Mirror"].unique()
Out[857]:
array(['YES', nan, 'NO'], dtype=object)
In [858]:
plt.figure(figsize=(15,5))
sns.countplot(main["Electric Folding Rear View Mirror"])
Out[858]:
<AxesSubplot:xlabel='Electric Folding Rear View Mirror', ylabel='count'>
In [859]:
main.columns[75]
Out[859]:
'Rear Window Wiper'
In [860]:
main["Rear Window Wiper"]
Out[860]:
0         NaN
1         NaN
2         YES
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Rear Window Wiper, Length: 160031, dtype: object
In [861]:
main["Rear Window Wiper"].unique()
Out[861]:
array([nan, 'YES', 'NO'], dtype=object)
In [862]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Window Wiper"])
Out[862]:
<AxesSubplot:xlabel='Rear Window Wiper', ylabel='count'>
In [863]:
main.columns[76]
Out[863]:
'Rear Window Washer'
In [864]:
main["Rear Window Washer"]
Out[864]:
0         NaN
1         NaN
2         YES
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Rear Window Washer, Length: 160031, dtype: object
In [865]:
main["Rear Window Washer"].unique()
Out[865]:
array([nan, 'YES', 'NO'], dtype=object)
In [866]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Window Washer"])
Out[866]:
<AxesSubplot:xlabel='Rear Window Washer', ylabel='count'>
In [867]:
main.columns[77]
Out[867]:
'Rear Window Defogger'
In [868]:
main["Rear Window Defogger"]
Out[868]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Rear Window Defogger, Length: 160031, dtype: object
In [869]:
main["Rear Window Defogger"].unique()
Out[869]:
array(['YES', nan, 'NO'], dtype=object)
In [870]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Window Defogger"])
Out[870]:
<AxesSubplot:xlabel='Rear Window Defogger', ylabel='count'>
In [871]:
main.columns[78]
Out[871]:
'Rear Spoiler'
In [872]:
main["Rear Spoiler"]
Out[872]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Rear Spoiler, Length: 160031, dtype: object
In [873]:
main["Rear Spoiler"].unique()
Out[873]:
array([nan, 'YES', 'NO'], dtype=object)
In [874]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Spoiler"])
Out[874]:
<AxesSubplot:xlabel='Rear Spoiler', ylabel='count'>
In [875]:
main.columns[79]
Out[875]:
'Sun Roof'
In [876]:
main["Sun Roof"]
Out[876]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Sun Roof, Length: 160031, dtype: object
In [877]:
main["Sun Roof"].unique()
Out[877]:
array([nan, 'YES', 'NO'], dtype=object)
In [878]:
plt.figure(figsize=(15,5))
sns.countplot(main["Sun Roof"])
Out[878]:
<AxesSubplot:xlabel='Sun Roof', ylabel='count'>
In [879]:
main.columns[80]
Out[879]:
'Moon Roof'
In [880]:
main["Moon Roof"]
Out[880]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Moon Roof, Length: 160031, dtype: object
In [881]:
main["Moon Roof"].unique()
Out[881]:
array([nan, 'YES', 'NO'], dtype=object)
In [882]:
plt.figure(figsize=(15,5))
sns.countplot(main["Moon Roof"])
Out[882]:
<AxesSubplot:xlabel='Moon Roof', ylabel='count'>
In [883]:
main.columns[81]
Out[883]:
'Outside Rear View Mirror Turn Indicators'
In [884]:
main["Outside Rear View Mirror Turn Indicators"]
Out[884]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    NaN
160029    NaN
160030    NaN
Name: Outside Rear View Mirror Turn Indicators, Length: 160031, dtype: object
In [885]:
main["Outside Rear View Mirror Turn Indicators"].unique()
Out[885]:
array(['YES', nan, 'NO'], dtype=object)
In [886]:
plt.figure(figsize=(15,5))
sns.countplot(main["Outside Rear View Mirror Turn Indicators"])
Out[886]:
<AxesSubplot:xlabel='Outside Rear View Mirror Turn Indicators', ylabel='count'>
In [887]:
main.columns[82]
Out[887]:
'Intergrated Antenna'
In [888]:
main["Intergrated Antenna"]
Out[888]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Intergrated Antenna, Length: 160031, dtype: object
In [889]:
main["Intergrated Antenna"].unique()
Out[889]:
array([nan, 'YES', 'NO'], dtype=object)
In [890]:
plt.figure(figsize=(15,5))
sns.countplot(main["Intergrated Antenna"])
Out[890]:
<AxesSubplot:xlabel='Intergrated Antenna', ylabel='count'>
In [891]:
main.columns[83]
Out[891]:
'Chrome Grille'
In [892]:
main["Chrome Grille"]
Out[892]:
0         NaN
1         NaN
2         YES
3         NaN
4         NaN
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Chrome Grille, Length: 160031, dtype: object
In [893]:
main["Chrome Grille"].unique()
Out[893]:
array([nan, 'YES', 'NO'], dtype=object)
In [894]:
plt.figure(figsize=(15,5))
sns.countplot(main["Chrome Grille"])
Out[894]:
<AxesSubplot:xlabel='Chrome Grille', ylabel='count'>
In [895]:
main.columns[84]
Out[895]:
'Halogen Headlamps'
In [896]:
main["Halogen Headlamps"]
Out[896]:
0          NO
1         YES
2         YES
3          NO
4         YES
          ... 
160026    NaN
160027    NaN
160028    NaN
160029    NaN
160030    NaN
Name: Halogen Headlamps, Length: 160031, dtype: object
In [897]:
main["Halogen Headlamps"].unique()
Out[897]:
array(['NO', 'YES', nan], dtype=object)
In [898]:
plt.figure(figsize=(15,5))
sns.countplot(main["Halogen Headlamps"])
Out[898]:
<AxesSubplot:xlabel='Halogen Headlamps', ylabel='count'>
In [899]:
main.columns[85]
Out[899]:
'Roof Rail'
In [900]:
main["Roof Rail"]
Out[900]:
0         NaN
1         NaN
2         YES
3         NaN
4         NaN
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Roof Rail, Length: 160031, dtype: object
In [901]:
main["Roof Rail"].unique()
Out[901]:
array([nan, 'YES', 'NO'], dtype=object)
In [902]:
plt.figure(figsize=(15,5))
sns.countplot(main["Roof Rail"])
Out[902]:
<AxesSubplot:xlabel='Roof Rail', ylabel='count'>
In [903]:
main.columns[86]
Out[903]:
'LED DRLs'
In [904]:
main["LED DRLs"]
Out[904]:
0         YES
1         NaN
2          NO
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: LED DRLs, Length: 160031, dtype: object
In [905]:
main["LED DRLs"].unique()
Out[905]:
array(['YES', nan, 'NO'], dtype=object)
In [906]:
plt.figure(figsize=(15,5))
sns.countplot(main["LED DRLs"])
Out[906]:
<AxesSubplot:xlabel='LED DRLs', ylabel='count'>
In [907]:
main.columns[87]
Out[907]:
'LED Taillights'
In [908]:
main["LED Taillights"]
Out[908]:
0         YES
1         NaN
2          NO
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: LED Taillights, Length: 160031, dtype: object
In [909]:
main["LED Taillights"].unique()
Out[909]:
array(['YES', nan, 'NO'], dtype=object)
In [910]:
plt.figure(figsize=(15,5))
sns.countplot(main["LED Taillights"])
Out[910]:
<AxesSubplot:xlabel='LED Taillights', ylabel='count'>
In [911]:
main.columns[88]
Out[911]:
'Anti-Lock Braking System'
In [912]:
main["Anti-Lock Braking System"]
Out[912]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Anti-Lock Braking System, Length: 160031, dtype: object
In [913]:
main["Anti-Lock Braking System"].unique()
Out[913]:
array(['YES', nan, 'NO', 'Speed Proportional Steering'], dtype=object)
In [914]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti-Lock Braking System"])
Out[914]:
<AxesSubplot:xlabel='Anti-Lock Braking System', ylabel='count'>
In [915]:
main["Anti-Lock Braking System"]=main["Anti-Lock Braking System"].apply(lambda x:preprocess_to_null_out(x,False))
In [916]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti-Lock Braking System"])
Out[916]:
<AxesSubplot:xlabel='Anti-Lock Braking System', ylabel='count'>
In [917]:
main.columns[89]
Out[917]:
'Central Locking'
In [918]:
main["Central Locking"]
Out[918]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Central Locking, Length: 160031, dtype: object
In [919]:
main["Central Locking"].unique()
Out[919]:
array(['YES', nan, 'NO'], dtype=object)
In [920]:
plt.figure(figsize=(15,5))
sns.countplot(main["Central Locking"])
Out[920]:
<AxesSubplot:xlabel='Central Locking', ylabel='count'>
In [921]:
main.columns[90]
Out[921]:
'Power Door Locks'
In [922]:
main["Power Door Locks"]
Out[922]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Power Door Locks, Length: 160031, dtype: object
In [923]:
main["Power Door Locks"].unique()
Out[923]:
array(['YES', nan, 'NO'], dtype=object)
In [924]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Door Locks"])
Out[924]:
<AxesSubplot:xlabel='Power Door Locks', ylabel='count'>
In [925]:
main.columns[91]
Out[925]:
'Child Safety Locks'
In [926]:
main["Child Safety Locks"]
Out[926]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Child Safety Locks, Length: 160031, dtype: object
In [927]:
main["Child Safety Locks"].unique()
Out[927]:
array(['YES', nan, 'NO'], dtype=object)
In [928]:
plt.figure(figsize=(15,5))
sns.countplot(main["Child Safety Locks"])
Out[928]:
<AxesSubplot:xlabel='Child Safety Locks', ylabel='count'>
In [929]:
main.columns[92]
Out[929]:
'Side Airbag-Front'
In [930]:
main["Side Airbag-Front"]
Out[930]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Side Airbag-Front, Length: 160031, dtype: object
In [931]:
main["Side Airbag-Front"].unique()
Out[931]:
array([nan, 'YES', 'NO'], dtype=object)
In [932]:
plt.figure(figsize=(15,5))
sns.countplot(main["Side Airbag-Front"])
Out[932]:
<AxesSubplot:xlabel='Side Airbag-Front', ylabel='count'>
In [933]:
main.columns[93]
Out[933]:
'Day & Night Rear View Mirror'
In [934]:
main["Day & Night Rear View Mirror"]
Out[934]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Day & Night Rear View Mirror, Length: 160031, dtype: object
In [935]:
main["Day & Night Rear View Mirror"].unique()
Out[935]:
array(['YES', nan, 'NO'], dtype=object)
In [936]:
plt.figure(figsize=(15,5))
sns.countplot(main["Day & Night Rear View Mirror"])
Out[936]:
<AxesSubplot:xlabel='Day & Night Rear View Mirror', ylabel='count'>
In [937]:
main.columns[94]
Out[937]:
'Passenger Side Rear View Mirror'
In [938]:
main["Passenger Side Rear View Mirror"]
Out[938]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Passenger Side Rear View Mirror, Length: 160031, dtype: object
In [939]:
main["Passenger Side Rear View Mirror"].unique()
Out[939]:
array(['YES', nan, 'NO'], dtype=object)
In [940]:
plt.figure(figsize=(15,5))
sns.countplot(main["Passenger Side Rear View Mirror"])
Out[940]:
<AxesSubplot:xlabel='Passenger Side Rear View Mirror', ylabel='count'>
In [941]:
main.columns[95]
Out[941]:
'Rear Seat Belts'
In [942]:
main["Rear Seat Belts"]
Out[942]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Rear Seat Belts, Length: 160031, dtype: object
In [943]:
main["Rear Seat Belts"].unique()
Out[943]:
array(['YES', nan, 'NO'], dtype=object)
In [944]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Seat Belts"])
Out[944]:
<AxesSubplot:xlabel='Rear Seat Belts', ylabel='count'>
In [945]:
main.columns[96]
Out[945]:
'Seat Belt Warning'
In [946]:
main["Seat Belt Warning"]
Out[946]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Seat Belt Warning, Length: 160031, dtype: object
In [947]:
main["Seat Belt Warning"].unique()
Out[947]:
array(['YES', nan, 'NO'], dtype=object)
In [948]:
plt.figure(figsize=(15,5))
sns.countplot(main["Seat Belt Warning"])
Out[948]:
<AxesSubplot:xlabel='Seat Belt Warning', ylabel='count'>
In [949]:
main.columns[97]
Out[949]:
'Door Ajar Warning'
In [950]:
main["Door Ajar Warning"]
Out[950]:
0         YES
1         NaN
2         NaN
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Door Ajar Warning, Length: 160031, dtype: object
In [951]:
main["Door Ajar Warning"].unique()
Out[951]:
array(['YES', nan, 'NO'], dtype=object)
In [952]:
plt.figure(figsize=(15,5))
sns.countplot(main["Door Ajar Warning"])
Out[952]:
<AxesSubplot:xlabel='Door Ajar Warning', ylabel='count'>
In [953]:
main.columns[98]
Out[953]:
'Adjustable Seats'
In [954]:
main["Adjustable Seats"]
Out[954]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Adjustable Seats, Length: 160031, dtype: object
In [955]:
main["Adjustable Seats"].unique()
Out[955]:
array(['YES', nan, 'NO'], dtype=object)
In [956]:
plt.figure(figsize=(15,5))
sns.countplot(main["Adjustable Seats"])
Out[956]:
<AxesSubplot:xlabel='Adjustable Seats', ylabel='count'>
In [957]:
main.columns[99]
Out[957]:
'Engine Immobilizer'
In [958]:
main["Engine Immobilizer"]
Out[958]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Engine Immobilizer, Length: 160031, dtype: object
In [959]:
main["Engine Immobilizer"].unique()
Out[959]:
array(['YES', nan, 'NO', 'Tubeless,Radial'], dtype=object)
In [960]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Immobilizer"])
Out[960]:
<AxesSubplot:xlabel='Engine Immobilizer', ylabel='count'>
In [961]:
main["Anti-Lock Braking System"]=main["Anti-Lock Braking System"].apply(lambda x:preprocess_to_null_out(x,False))
In [962]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti-Lock Braking System"])
Out[962]:
<AxesSubplot:xlabel='Anti-Lock Braking System', ylabel='count'>
In [963]:
main["Engine Immobilizer"]=main["Engine Immobilizer"].apply(lambda x:preprocess_to_null_out(x,False))
In [964]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Immobilizer"])
Out[964]:
<AxesSubplot:xlabel='Engine Immobilizer', ylabel='count'>
In [965]:
main.columns[100]
Out[965]:
'Crash Sensor'
In [966]:
main["Crash Sensor"]
Out[966]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    NaN
160027    NaN
160028    NaN
160029    NaN
160030    NaN
Name: Crash Sensor, Length: 160031, dtype: object
In [967]:
main["Crash Sensor"].unique()
Out[967]:
array(['YES', nan, 'NO'], dtype=object)
In [968]:
plt.figure(figsize=(15,5))
sns.countplot(main["Crash Sensor"])
Out[968]:
<AxesSubplot:xlabel='Crash Sensor', ylabel='count'>
In [969]:
main.columns[101]
Out[969]:
'Engine Check Warning'
In [970]:
main["Engine Check Warning"]
Out[970]:
0         YES
1         NaN
2         YES
3         YES
4         YES
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Engine Check Warning, Length: 160031, dtype: object
In [971]:
main["Engine Check Warning"].unique()
Out[971]:
array(['YES', nan, 'NO'], dtype=object)
In [972]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Check Warning"])
Out[972]:
<AxesSubplot:xlabel='Engine Check Warning', ylabel='count'>
In [973]:
main.columns[102]
Out[973]:
'Automatic Headlamps'
In [974]:
main["Automatic Headlamps"]
Out[974]:
0         YES
1         NaN
2          NO
3         YES
4         NaN
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Automatic Headlamps, Length: 160031, dtype: object
In [975]:
main["Automatic Headlamps"].unique()
Out[975]:
array(['YES', nan, 'NO'], dtype=object)
In [976]:
plt.figure(figsize=(15,5))
sns.countplot(main["Automatic Headlamps"])
Out[976]:
<AxesSubplot:xlabel='Automatic Headlamps', ylabel='count'>
In [977]:
main.columns[103]
Out[977]:
'EBD'
In [978]:
main["EBD"]
Out[978]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: EBD, Length: 160031, dtype: object
In [979]:
main["EBD"].unique()
Out[979]:
array(['YES', nan, 'NO'], dtype=object)
In [980]:
plt.figure(figsize=(15,5))
sns.countplot(main["EBD"])
Out[980]:
<AxesSubplot:xlabel='EBD', ylabel='count'>
In [981]:
main.columns[104]
Out[981]:
'Electronic Stability Control'
In [982]:
main["Electronic Stability Control"]
Out[982]:
0          NO
1         NaN
2         NaN
3         YES
4          NO
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Electronic Stability Control, Length: 160031, dtype: object
In [983]:
main["Electronic Stability Control"].unique()
Out[983]:
array(['NO', nan, 'YES'], dtype=object)
In [984]:
plt.figure(figsize=(15,5))
sns.countplot(main["Electronic Stability Control"])
Out[984]:
<AxesSubplot:xlabel='Electronic Stability Control', ylabel='count'>
In [985]:
main.columns[105]
Out[985]:
'Follow Me Home Headlamps'
In [986]:
main["Follow Me Home Headlamps"]
Out[986]:
0         YES
1         NaN
2         NaN
3         YES
4         NaN
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Follow Me Home Headlamps, Length: 160031, dtype: object
In [987]:
main["Follow Me Home Headlamps"].unique()
Out[987]:
array(['YES', nan, 'NO'], dtype=object)
In [988]:
plt.figure(figsize=(15,5))
sns.countplot(main["Follow Me Home Headlamps"])
Out[988]:
<AxesSubplot:xlabel='Follow Me Home Headlamps', ylabel='count'>
In [989]:
main.columns[106]
Out[989]:
'Rear Camera'
In [990]:
main["Rear Camera"]
Out[990]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Rear Camera, Length: 160031, dtype: object
In [991]:
main["Rear Camera"].unique()
Out[991]:
array(['YES', nan, 'NO'], dtype=object)
In [992]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Camera"])
Out[992]:
<AxesSubplot:xlabel='Rear Camera', ylabel='count'>
In [993]:
main.columns[107]
Out[993]:
'ISOFIX Child Seat Mounts'
In [994]:
main["ISOFIX Child Seat Mounts"]
Out[994]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: ISOFIX Child Seat Mounts, Length: 160031, dtype: object
In [995]:
main["ISOFIX Child Seat Mounts"].unique()
Out[995]:
array(['YES', nan, 'NO'], dtype=object)
In [996]:
plt.figure(figsize=(15,5))
sns.countplot(main["ISOFIX Child Seat Mounts"])
Out[996]:
<AxesSubplot:xlabel='ISOFIX Child Seat Mounts', ylabel='count'>
In [997]:
main.columns[109]
Out[997]:
'Hill Assist'
In [998]:
main["Hill Assist"]
Out[998]:
0          NO
1         NaN
2          NO
3         YES
4          NO
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Hill Assist, Length: 160031, dtype: object
In [999]:
main["Hill Assist"].unique()
Out[999]:
array(['NO', nan, 'YES'], dtype=object)
In [1000]:
plt.figure(figsize=(15,5))
sns.countplot(main["Hill Assist"])
Out[1000]:
<AxesSubplot:xlabel='Hill Assist', ylabel='count'>
In [1001]:
main.columns[110]
Out[1001]:
'Radio'
In [1002]:
main["Radio"]
Out[1002]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Radio, Length: 160031, dtype: object
In [1003]:
main["Radio"].unique()
Out[1003]:
array(['YES', nan, 'NO'], dtype=object)
In [1004]:
plt.figure(figsize=(15,5))
sns.countplot(main["Radio"])
Out[1004]:
<AxesSubplot:xlabel='Radio', ylabel='count'>
In [1005]:
main.columns[111]
Out[1005]:
'Audio System Remote Control'
In [1006]:
main["Audio System Remote Control"]
Out[1006]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Audio System Remote Control, Length: 160031, dtype: object
In [1007]:
main["Audio System Remote Control"].unique()
Out[1007]:
array(['YES', nan, 'NO'], dtype=object)
In [1008]:
plt.figure(figsize=(15,5))
sns.countplot(main["Audio System Remote Control"])
Out[1008]:
<AxesSubplot:xlabel='Audio System Remote Control', ylabel='count'>
In [1009]:
main.columns[112]
Out[1009]:
'Speakers Front'
In [1010]:
main["Speakers Front"]
Out[1010]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Speakers Front, Length: 160031, dtype: object
In [1011]:
main["Speakers Front"].unique()
Out[1011]:
array(['YES', nan, 'NO'], dtype=object)
In [1012]:
plt.figure(figsize=(15,5))
sns.countplot(main["Speakers Front"])
Out[1012]:
<AxesSubplot:xlabel='Speakers Front', ylabel='count'>
In [1013]:
main.columns[113]
Out[1013]:
'Speakers Rear'
In [1014]:
main["Speakers Rear"]
Out[1014]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Speakers Rear, Length: 160031, dtype: object
In [1015]:
main["Speakers Rear"].unique()
Out[1015]:
array(['YES', nan, 'NO'], dtype=object)
In [1016]:
plt.figure(figsize=(15,5))
sns.countplot(main["Speakers Rear"])
Out[1016]:
<AxesSubplot:xlabel='Speakers Rear', ylabel='count'>
In [1017]:
main.columns[114]
Out[1017]:
'Integrated 2DIN Audio'
In [1018]:
main["Integrated 2DIN Audio"]
Out[1018]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Integrated 2DIN Audio, Length: 160031, dtype: object
In [1019]:
main["Integrated 2DIN Audio"].unique()
Out[1019]:
array(['YES', nan, 'NO'], dtype=object)
In [1020]:
plt.figure(figsize=(15,5))
sns.countplot(main["Integrated 2DIN Audio"])
Out[1020]:
<AxesSubplot:xlabel='Integrated 2DIN Audio', ylabel='count'>
In [1021]:
main.columns[115]
Out[1021]:
'USB & Auxiliary input'
In [1022]:
main["USB & Auxiliary input"]
Out[1022]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: USB & Auxiliary input, Length: 160031, dtype: object
In [1023]:
main["USB & Auxiliary input"].unique()
Out[1023]:
array(['YES', nan, 'NO'], dtype=object)
In [1024]:
plt.figure(figsize=(15,5))
sns.countplot(main["USB & Auxiliary input"])
Out[1024]:
<AxesSubplot:xlabel='USB & Auxiliary input', ylabel='count'>
In [1025]:
main.columns[116]
Out[1025]:
'Bluetooth Connectivity'
In [1026]:
main["Bluetooth Connectivity"]
Out[1026]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Bluetooth Connectivity, Length: 160031, dtype: object
In [1027]:
main["Bluetooth Connectivity"].unique()
Out[1027]:
array(['YES', nan, 'NO'], dtype=object)
In [1028]:
plt.figure(figsize=(15,5))
sns.countplot(main["Bluetooth Connectivity"])
Out[1028]:
<AxesSubplot:xlabel='Bluetooth Connectivity', ylabel='count'>
In [1029]:
main.columns[117]
Out[1029]:
'Android Auto'
In [1030]:
main["Android Auto"]
Out[1030]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Android Auto, Length: 160031, dtype: object
In [1031]:
main["Android Auto"].unique()
Out[1031]:
array(['YES', nan, 'NO'], dtype=object)
In [1032]:
plt.figure(figsize=(15,5))
sns.countplot(main["Android Auto"])
Out[1032]:
<AxesSubplot:xlabel='Android Auto', ylabel='count'>
In [1033]:
main.columns[118]
Out[1033]:
'Apple CarPlay'
In [1034]:
main["Apple CarPlay"]
Out[1034]:
0         YES
1         NaN
2         YES
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Apple CarPlay, Length: 160031, dtype: object
In [1035]:
main["Apple CarPlay"].unique()
Out[1035]:
array(['YES', nan, 'NO'], dtype=object)
In [1036]:
plt.figure(figsize=(15,5))
sns.countplot(main["Apple CarPlay"])
Out[1036]:
<AxesSubplot:xlabel='Apple CarPlay', ylabel='count'>
In [1037]:
main.columns[119]
Out[1037]:
'Place'
In [1038]:
main["Place"]
Out[1038]:
0                              On-Road Price in Amritsar : 
1                               On-Road Price in Kolkata : 
2                                 On-Road Price in Surat : 
3                                 On-Road Price in Thane : 
4                           On-Road Price in Navi Mumbai : 
                                ...                        
160026    On-Road Price in Gurgaon : (Not Available in F...
160027    On-Road Price in New Delhi : (Not Available in...
160028    On-Road Price in Kolkata : (Not Available in D...
160029                        On-Road Price in New Delhi : 
160030    On-Road Price in New Delhi : (Not Available in...
Name: Place, Length: 160031, dtype: object
In [1039]:
main["Place"].unique()
Out[1039]:
array(['On-Road Price in Amritsar : ', 'On-Road Price in Kolkata : ',
        'On-Road Price in Surat : ', 'On-Road Price in Thane : ',
        'On-Road Price in Navi Mumbai : ', 'On-Road Price in Jaipur : ',
        'On-Road Price in Ludhiana : ', 'On-Road Price in Madurai :',
        'On-Road Price in Jabalpur : ', 'On-Road Price in Chennai : ',
        'On-Road Price in Ahmedabad : ', 'On-Road Price in Hyderabad :',
        'On-Road Price in Varanasi : ', 'On-Road Price in Bangalore :',
        'On-Road Price in Panvel : (Not Available in Navi Mumbai)',
        'On-Road Price in Meerut : ', 'On-Road Price in Srinagar :',
        'On-Road Price in Chandigarh :',
        'On-Road Price in New Delhi : (Not Available in Jodhpur)',
        'On-Road Price in Vijayawada :', 'On-Road Price in Gwalior : ',
        'On-Road Price in Kanpur : ', 'On-Road Price in Kota : ',
        'On-Road Price in Ghaziabad :', 'On-Road Price in Howrah : ',
        'On-Road Price in Mysore : ', 'On-Road Price in Srinagar : ',
        'On-Road Price in Raipur : ', 'On-Road Price in Dhanbad : ',
        'On-Road Price in Patna : ', 'On-Road Price in Mysore :',
        'On-Road Price in Hyderabad : ', 'On-Road Price in Jodhpur : ',
        'On-Road Price in Solapur : ',
        'On-Road Price in Pune : (Not Available in Pimpri chinchwad)',
        'On-Road Price in Gwalior :', 'On-Road Price in Coimbatore : ',
        'On-Road Price in New Delhi : (Not Available in Navi Mumbai)',
        'On-Road Price in Bhopal : ', 'On-Road Price in Guwahati : ',
        'On-Road Price in New Delhi : (Not Available in Solapur)',
        'On-Road Price in Faridabad : ', 'On-Road Price in Jaipur :',
        'On-Road Price in Faridabad :', 'On-Road Price in Nagpur : ',
        'On-Road Price in Pune :(Not Available in Pimpri chinchwad)',
        'On-Road Price in Indore : ', 'On-Road Price in Mumbai : ',
        'On-Road Price in Raipur :', 'On-Road Price in Vijayawada : ',
        'On-Road Price in Chandigarh : ', 'On-Road Price in Aurangabad : ',
        'On-Road Price in Agra : ', 'On-Road Price in Lucknow : ',
        'On-Road Price in Surat :', 'On-Road Price in New Delhi : ',
        'On-Road Price in New Delhi : (Not Available in Rajkot)',
        'On-Road Price in Madurai : ', 'On-Road Price in Pune : ',
        'On-Road Price in Ghaziabad : ',
        'On-Road Price in Visakhapatnam : ',
        'On-Road Price in Bangalore : ', 'On-Road Price in Vadodara : ',
        'On-Road Price in Allahabad : ',
        'On-Road Price in New Delhi : (Not Available in Surat)',
        'On-Road Price in Navi Mumbai :', 'On-Road Price in Nashik : ',
        'On-Road Price in Amritsar :', 'On-Road Price in Nashik :',
        'On-Road Price in Aurangabad :', 'On-Road Price in Howrah :',
        'On-Road Price in Agra :', 'On-Road Price in Pune :',
        'On-Road Price in Rajkot : ', 'On-Road Price in Ranchi :',
        'On-Road Price in Ranchi : ', 'On-Road Price in Nagpur :',
        'On-Road Price in Kota :', 'On-Road Price in Solapur :',
        'On-Road Price in New Delhi :',
        'On-Road Price in Kharghar : (Not Available in Navi Mumbai)',
        'On-Road Price in Jabalpur :', 'On-Road Price in Patna :',
        'On-Road Price in Coimbatore :', 'On-Road Price in Chennai :',
        'On-Road Price in New Delhi : (Not Available in Vadodara)',
        'On-Road Price in Vadodara :',
        'On-Road Price in New Delhi : (Not Available in Aurangabad)',
        'On-Road Price in New Delhi : (Not Available in Bhopal)',
        'On-Road Price in Dhanbad :', 'On-Road Price in Bhopal :',
        'On-Road Price in New Delhi : (Not Available in Chennai)',
        'On-Road Price in Allahabad :', 'On-Road Price in Lucknow :',
        'On-Road Price in New Delhi : (Not Available in Ranchi)',
        'On-Road Price in New Delhi : (Not Available in Srinagar)',
        'On-Road Price in Guwahati :', 'On-Road Price in Ahmedabad :',
        'On-Road Price in New Delhi : (Not Available in Gwalior)',
        'On-Road Price in Rajkot :', 'On-Road Price in Kanpur :',
        'On-Road Price in New Delhi : (Not Available in Pune)',
        'On-Road Price in New Delhi : (Not Available in Meerut)',
        'On-Road Price in New Delhi : (Not Available in Howrah)',
        'On-Road Price in Mumbai :', 'On-Road Price in Meerut :',
        'On-Road Price in New Delhi : (Not Available in Lucknow)',
        'On-Road Price in Varanasi :',
        'On-Road Price in New Delhi : (Not Available in Varanasi)',
        'On-Road Price in New Delhi : (Not Available in Kota)',
        'On-Road Price in New Delhi : (Not Available in Ludhiana)',
        'On-Road Price in Ludhiana :',
        'On-Road Price in New Delhi : (Not Available in Raipur)',
        'On-Road Price in Indore :',
        'On-Road Price in New Delhi : (Not Available in Bangalore)',
        'On-Road Price in New Delhi : (Not Available in Patna)',
        'On-Road Price in New Delhi : (Not Available in Dhanbad)',
        'On-Road Price in New Delhi : (Not Available in Mysore)',
        'On-Road Price in Kolkata : (Not Available in Howrah)',
        'On-Road Price in Visakhapatnam :',
        'On-Road Price in New Delhi : (Not Available in Mumbai)',
        'On-Road Price in New Delhi : (Not Available in Ahmedabad)',
        'On-Road Price in New Delhi : (Not Available in Visakhapatnam)',
        'On-Road Price in New Delhi : (Not Available in Allahabad)',
        'On-Road Price in Thane :',
        'On-Road Price in New Delhi : (Not Available in Vijayawada)',
        'On-Road Price in New Delhi : (Not Available in Faridabad)',
        'On-Road Price in New Delhi : (Not Available in Thane)',
        'On-Road Price in New Delhi : (Not Available in Nagpur)',
        'On-Road Price in New Delhi : (Not Available in Amritsar)',
        'On-Road Price in New Delhi : (Not Available in Ghaziabad)',
        'On-Road Price in New Delhi : (Not Available in Coimbatore)',
        'On-Road Price in Jodhpur :',
        'On-Road Price in New Delhi : (Not Available in Indore)',
        'On-Road Price in Kolkata :',
        'On-Road Price in New Delhi : (Not Available in Pimpri chinchwad)',
        'On-Road Price in New Delhi : (Not Available in Hyderabad)',
        'On-Road Price in New Delhi : (Not Available in Nashik)',
        'On-Road Price in New Delhi : (Not Available in Kanpur)',
        'On-Road Price in New Delhi : (Not Available in Madurai)',
        'On-Road Price in New Delhi : (Not Available in Jaipur)',
        'On-Road Price in New Delhi : (Not Available in Guwahati)',
        'On-Road Price in New Delhi : (Not Available in Jabalpur)',
        'On-Road Price in New Delhi : (Not Available in Kolkata)',
        'On-Road Price in New Delhi : (Not Available in Agra)',
        'On-Road Price in New Delhi : (Not Available in Chandigarh)',
        'On-Road Price in Kapurthala : (Not Available in Amritsar)',
        'On-Road Price in Pimpri chinchwad : ',
        'On-Road Price in Kolhapur : (Not Available in Solapur)',
        'On-Road Price in Mumbai : (Not Available in Navi Mumbai)',
        'On-Road Price in Allahabad : (Not Available in Varanasi)',
        'On-Road Price in Bhilwara : ',
        'On-Road Price in Gurgaon : (Not Available in Faridabad)',
        'On-Road Price in Bhopal : (Not Available in Gwalior)',
        'On-Road Price in Kolkata : (Not Available in Patna)',
        'On-Road Price in Udaipur : ',
        'On-Road Price in Jamshedpur : (Not Available in Dhanbad)',
        'On-Road Price in Jammu : (Not Available in Srinagar)',
        'On-Road Price in Tiruchirappalli : ',
        'On-Road Price in Bhopal : (Not Available in Jabalpur)',
        'On-Road Price in Pune : (Not Available in Solapur)',
        'On-Road Price in Guntur : ',
        'On-Road Price in Agra : (Not Available in Ghaziabad)',
        'On-Road Price in Jamshedpur : ',
        'On-Road Price in Bareilly : (Not Available in Meerut)',
        'On-Road Price in Bangalore : (Not Available in Mysore)',
        'On-Road Price in Ahmedabad : (Not Available in Vadodara)',
        'On-Road Price in Udaipur : (Not Available in Jodhpur)',
        'On-Road Price in Lucknow : (Not Available in Kanpur)',
        'On-Road Price in Ranchi : (Not Available in Dhanbad)',
        'On-Road Price in Lucknow : (Not Available in Allahabad)',
        'On-Road Price in Lucknow : (Not Available in Agra)',
        'On-Road Price in Visakhapatnam : (Not Available in Vijayawada)',
        'On-Road Price in Lucknow : (Not Available in Ghaziabad)',
        'On-Road Price in Lucknow : (Not Available in Varanasi)',
        'On-Road Price in Jaipur : (Not Available in Kota)',
        'On-Road Price in Ludhiana : (Not Available in Amritsar)',
        'On-Road Price in Nashik : (Not Available in Aurangabad)',
        'On-Road Price in Lucknow : (Not Available in Meerut)',
        'On-Road Price in Kolkata : (Not Available in Guwahati)',
        'On-Road Price in Noida : (Not Available in Ghaziabad)',
        'On-Road Price in Indore : (Not Available in Jabalpur)',
        'On-Road Price in Navi Mumbai : (Not Available in Nashik)',
        'On-Road Price in Mumbai : (Not Available in Thane)',
        'On-Road Price in Mangalagiri : (Not Available in Vijayawada)',
        'On-Road Price in Indore : (Not Available in Bhopal)',
        'On-Road Price in Indore : (Not Available in Gwalior)',
        'On-Road Price in Mangalagiri : (Not Available in Visakhapatnam)',
        'On-Road Price in Noida : (Not Available in Agra)',
        'On-Road Price in Noida : (Not Available in Meerut)',
        'On-Road Price in Mumbai : (Not Available in Aurangabad)',
        'On-Road Price in Mumbai : (Not Available in Surat)',
        'On-Road Price in Chennai : (Not Available in Visakhapatnam)',
        'On-Road Price in Chennai : (Not Available in Mysore)',
        'On-Road Price in Mumbai : (Not Available in Ahmedabad)',
        'On-Road Price in Mumbai : (Not Available in Nashik)',
        'On-Road Price in Mumbai : (Not Available in Pune)',
        'On-Road Price in Mumbai : (Not Available in Indore)',
        'On-Road Price in Mumbai : (Not Available in Nagpur)',
        'On-Road Price in Chennai : (Not Available in Raipur)',
        'On-Road Price in Chennai : (Not Available in Bangalore)',
        'On-Road Price in Mumbai : (Not Available in Pimpri chinchwad)',
        'On-Road Price in Chennai : (Not Available in Vijayawada)',
        'On-Road Price in Mumbai : (Not Available in Vadodara)',
        'On-Road Price in Mumbai : (Not Available in Solapur)',
        'On-Road Price in Chennai : (Not Available in Coimbatore)',
        'On-Road Price in Chennai : (Not Available in Hyderabad)',
        'On-Road Price in Mumbai : (Not Available in Rajkot)',
        'On-Road Price in Chennai : (Not Available in Madurai)',
        'On-Road Price in Kolkata : (Not Available in Dhanbad)',
        'On-Road Price in Barshi : (Not Available in Solapur)',
        'On-Road Price in Chandrapur : (Not Available in Nagpur)',
        'On-Road Price in Chhindwara : (Not Available in Bhopal)',
        'On-Road Price in Vadodara : (Not Available in Surat)',
        'On-Road Price in Kolkata : (Not Available in Ranchi)',
        'On-Road Price in Chhindwara : (Not Available in Jabalpur)',
        'On-Road Price in Chhindwara : (Not Available in Gwalior)',
        'On-Road Price in Ahmedabad : (Not Available in Rajkot)',
        'On-Road Price in Vijayawada : (Not Available in Visakhapatnam)',
        'On-Road Price in Kolkata : (Not Available in Raipur)',
        'On-Road Price in Chhindwara : (Not Available in Indore)',
        'On-Road Price in Agra : (Not Available in Meerut)',
        'On-Road Price in Mumbai : (Not Available in Mysore)',
        'On-Road Price in Mumbai : (Not Available in Bangalore)',
        'On-Road Price in Mumbai : (Not Available in Coimbatore)',
        'On-Road Price in Mumbai : (Not Available in Vijayawada)',
        'On-Road Price in Mumbai : (Not Available in Hyderabad)',
        'On-Road Price in Mumbai : (Not Available in Madurai)',
        'On-Road Price in Mumbai : (Not Available in Chennai)',
        'On-Road Price in Mumbai : (Not Available in Visakhapatnam)',
        'On-Road Price in Guntur : (Not Available in Vijayawada)',
        'On-Road Price in Rajahmundry : ',
        'On-Road Price in Pune : (Not Available in Aurangabad)',
        'On-Road Price in Sant Kabir Nagar : (Not Available in Varanasi)',
        'On-Road Price in North 24 Parganas : (Not Available in Dhanbad)',
        'On-Road Price in North 24 Parganas : (Not Available in Ranchi)',
        'On-Road Price in Jalandhar : (Not Available in Amritsar)',
        'On-Road Price in North 24 Parganas : (Not Available in Howrah)',
        'On-Road Price in North 24 Parganas : (Not Available in Patna)',
        'On-Road Price in North 24 Parganas : ',
        'On-Road Price in Ajmer : (Not Available in Jodhpur)',
        'On-Road Price in Jaipur : (Not Available in Jodhpur)',
        'On-Road Price in Satara : (Not Available in Solapur)',
        'On-Road Price in Guna : (Not Available in Gwalior)',
        'On-Road Price in Aurangabad : (Not Available in Nagpur)',
        'On-Road Price in Coimbatore : (Not Available in Madurai)',
        'On-Road Price in Guntur : (Not Available in Visakhapatnam)',
        'On-Road Price in Jamshedpur : (Not Available in Ranchi)',
        'On-Road Price in Ghaziabad : (Not Available in Meerut)',
        'On-Road Price in Krishna : ',
        'On-Road Price in Krishna : (Not Available in Vijayawada)',
        'On-Road Price in Anantnag : (Not Available in Srinagar)',
        'On-Road Price in Ahmedabad : (Not Available in Surat)',
        'On-Road Price in Pune : (Not Available in Nagpur)',
        'On-Road Price in Gurgaon : ',
        'On-Road Price in Varanasi : (Not Available in Allahabad)',
        'On-Road Price in Jalandhar : ',
        'On-Road Price in Krishna : (Not Available in Visakhapatnam)',
        'On-Road Price in Rohtak : (Not Available in Faridabad)',
        'On-Road Price in Kolkata : (Not Available in Allahabad)',
        'On-Road Price in Rohtak : ',
        'On-Road Price in Thane : (Not Available in Nashik)',
        'On-Road Price in Thane : (Not Available in Aurangabad)',
        'On-Road Price in Muzaffarpur : ',
        'On-Road Price in Panvel : (Not Available in Pune)',
        'On-Road Price in Panvel : (Not Available in Pimpri chinchwad)',
        'On-Road Price in Etawah : (Not Available in Agra)',
        'On-Road Price in Nagaon : ',
        'On-Road Price in Thane : (Not Available in Solapur)',
        'On-Road Price in Hazaribagh : ',
        'On-Road Price in Thane : (Not Available in Pimpri chinchwad)',
        'On-Road Price in Panvel : ',
        'On-Road Price in Thane : (Not Available in Nagpur)',
        'On-Road Price in Jammu : ',
        'On-Road Price in Kolkata : (Not Available in Visakhapatnam)',
        'On-Road Price in Kolkata : (Not Available in Varanasi)',
        'On-Road Price in Sangli : (Not Available in Solapur)',
        'On-Road Price in Anand : (Not Available in Ahmedabad)',
        'On-Road Price in Ballabhgarh : (Not Available in Faridabad)'],
      dtype=object)
In [1040]:
def preprocess_Place(text):
    text=text.split(":")[0]
    text=text.replace("On-Road Price in ","")
    if text[len(text)-1]==" ":
        text=text[:len(text)-1]
    text=text.title()
    return text
In [1041]:
preprocess_Place("On-Road Price in Thane : (Not Available in Solapur)")
Out[1041]:
'Thane'
In [1042]:
main["Place"]=main["Place"].apply(preprocess_Place)
In [1043]:
main["Place"].unique()
Out[1043]:
array(['Amritsar', 'Kolkata', 'Surat', 'Thane', 'Navi Mumbai', 'Jaipur',
        'Ludhiana', 'Madurai', 'Jabalpur', 'Chennai', 'Ahmedabad',
        'Hyderabad', 'Varanasi', 'Bangalore', 'Panvel', 'Meerut',
        'Srinagar', 'Chandigarh', 'New Delhi', 'Vijayawada', 'Gwalior',
        'Kanpur', 'Kota', 'Ghaziabad', 'Howrah', 'Mysore', 'Raipur',
        'Dhanbad', 'Patna', 'Jodhpur', 'Solapur', 'Pune', 'Coimbatore',
        'Bhopal', 'Guwahati', 'Faridabad', 'Nagpur', 'Indore', 'Mumbai',
        'Aurangabad', 'Agra', 'Lucknow', 'Visakhapatnam', 'Vadodara',
        'Allahabad', 'Nashik', 'Rajkot', 'Ranchi', 'Kharghar',
        'Kapurthala', 'Pimpri Chinchwad', 'Kolhapur', 'Bhilwara',
        'Gurgaon', 'Udaipur', 'Jamshedpur', 'Jammu', 'Tiruchirappalli',
        'Guntur', 'Bareilly', 'Noida', 'Mangalagiri', 'Barshi',
        'Chandrapur', 'Chhindwara', 'Rajahmundry', 'Sant Kabir Nagar',
        'North 24 Parganas', 'Jalandhar', 'Ajmer', 'Satara', 'Guna',
        'Krishna', 'Anantnag', 'Rohtak', 'Muzaffarpur', 'Etawah', 'Nagaon',
        'Hazaribagh', 'Sangli', 'Anand', 'Ballabhgarh'], dtype=object)
In [1044]:
main.columns[120]
Out[1044]:
'option'
In [1045]:
main["option"]
Out[1045]:
0          NO
1         YES
2         YES
3         YES
4         YES
          ... 
160026     NO
160027     NO
160028     NO
160029    YES
160030    YES
Name: option, Length: 160031, dtype: object
In [1046]:
main["option"].unique()
Out[1046]:
array(['NO', 'YES'], dtype=object)
In [1047]:
plt.figure(figsize=(15,5))
sns.countplot(main["option"])
Out[1047]:
<AxesSubplot:xlabel='option', ylabel='count'>
In [1048]:
main.columns[122]
Out[1048]:
'City Mileage'
In [1049]:
main["City Mileage"]
Out[1049]:
0         20.0 kmpl
1               NaN
2         13.0 kmpl
3         19.0 kmpl
4         17.0 kmpl
            ...    
160026          NaN
160027          NaN
160028          NaN
160029          NaN
160030          NaN
Name: City Mileage, Length: 160031, dtype: object
In [1050]:
main["City Mileage"].unique()
Out[1050]:
array(['20.0 kmpl', nan, '13.0 kmpl', '19.0 kmpl', '17.0 kmpl',
        '19.02 kmpl', '21.0 kmpl', '25.0 km/kg', '14.65 kmpl', '16.0 kmpl',
        '14.0 kmpl', '23.0 kmpl', '16.5 kmpl', '32.0 km/kg', '26.0 kmpl',
        '29.0 km/kg', '12.3 kmpl', '13.1 kmpl', '10.1 kmpl', '10.5 kmpl',
        '9.5 kmpl', '17.5 kmpl', '16.49 kmpl', '11.6 kmpl', '14.9 kmpl',
        '10.0 kmpl', '19.42 kmpl', '13.5 kmpl', '4.6 kmpl', '11.0 kmpl',
        '8.2 kmpl', '10.1 km/kg', '9.3 kmpl', '21.2 kmpl', '10.2 kmpl',
        '20.2 kmpl', '13.7 kmpl', '13.84 kmpl', '7.7 kmpl', '12.14 kmpl',
        '12.0 kmpl', '14.68 kmpl', '22.4 kmpl', '9.62 kmpl', '13.48 kmpl',
        '11.5 kmpl', '16.28 kmpl', '13.86 kmpl', '15.32 kmpl', '18.0 kmpl',
        '24.0 km/kg', '12.6 kmpl', '15.0 kmpl', '12.57 kmpl', '13.78 kmpl',
        '22.0 km/kg', '12.06 kmpl', '11.51 kmpl', '12.24 kmpl',
        '3.22 kmpl', '8.0 kmpl', '15.64 kmpl', '12.08 kmpl', '9.0 kmpl',
        '17.19 kmpl', '15.4 kmpl', '20.37 kmpl', '10.24 kmpl', '6.89 kmpl',
        '9.6 kmpl', '6.5 kmpl', '7.6 kmpl', '11.0 km/kg', '11.4 kmpl',
        '10.8 kmpl', '8.25 kmpl', '13.6 kmpl', '5.7 kmpl', '13.04 kmpl',
        '14.14 kmpl', '12.99 kmpl', '13.41 kmpl', '14.42 kmpl',
        '14.03 kmpl', '18.5 kmpl', '26.93 km/kg', '16.8 kmpl',
        '14.17 kmpl', '26.0 km/kg', '16.48 kmpl', '23.0 km/kg',
        '16.94 kmpl', '17.35 kmpl', '10.52 kmpl', '11.54 kmpl',
        '11.96 kmpl', '13.47 kmpl', '12.12 kmpl', '13.64 kmpl'],
      dtype=object)
In [1051]:
def preprocess_City_Mileage(text):
    text=str(text)
    if text=="nan":
        return np.nan
    varient="".join(re.findall("[a-zA-Z\/]",text))
    if varient=="kmpl":
        text=float(text.replace(varient,""))
    elif varient=="km/kg":
        text=float(text.replace(varient,""))*1.40
    return text    
In [1052]:
main["City Mileage"]=main["City Mileage"].apply(preprocess_City_Mileage)
In [1053]:
main.rename(columns={"City Mileage":"City Mileage(Km/L)"},inplace=True)
In [1054]:
main["City Mileage(Km/L)"]
Out[1054]:
0         20.0
1          NaN
2         13.0
3         19.0
4         17.0
          ... 
160026     NaN
160027     NaN
160028     NaN
160029     NaN
160030     NaN
Name: City Mileage(Km/L), Length: 160031, dtype: float64
In [1055]:
main.columns[123]
Out[1055]:
'Turning Radius (Metres)'
In [1056]:
main["Turning Radius (Metres)"]
Out[1056]:
0         4.8
1         4.5
2         5.5
3         4.8
4         4.5
          ... 
160026    NaN
160027    NaN
160028    NaN
160029    NaN
160030    NaN
Name: Turning Radius (Metres), Length: 160031, dtype: object
In [407]:
def preprocess_turing_radius(text):
    text=str(text)
    if text=='nan':
        return np.nan
    text=text.title()
    if "Mm" in text:
        float_=float(text.replace("".join(re.findall('[a-zA-Z]',text)),""))
        return "{} Metres".format(float_/1000)
    if "Meters" in text:
        text=text.replace("Meters","Metres")
    if "Meter" in text:
        text=text.replace("Meter","Metres")
    if 'M' in text and "Metres" not in text:
        text=text.replace("M"," Metres")
    if "Metres" not in text:
        text=text+" Metres"
    if text[text.find('M')-1]!=" ":
        text=text[:text.find('M')]+" "+text[text.find('M'):]
    return text
In [408]:
main["Turning Radius (Metres)"]=main["Turning Radius (Metres)"].apply(preprocess_turing_radius)
In [1057]:
main.columns[124]
Out[1057]:
'Vanity Mirror'
In [1058]:
main["Vanity Mirror"]
Out[1058]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Vanity Mirror, Length: 160031, dtype: object
In [1059]:
main["Vanity Mirror"].unique()
Out[1059]:
array(['YES', nan, 'NO', '5', '4', '2 Zone', '2765', '3 Zone'],
      dtype=object)
In [1060]:
plt.figure(figsize=(15,5))
sns.countplot(main["Vanity Mirror"])
Out[1060]:
<AxesSubplot:xlabel='Vanity Mirror', ylabel='count'>
In [1061]:
main["Vanity Mirror"]=main["Vanity Mirror"].apply(lambda x:preprocess_to_null_out(x,False))
In [1062]:
plt.figure(figsize=(15,5))
sns.countplot(main["Vanity Mirror"])
Out[1062]:
<AxesSubplot:xlabel='Vanity Mirror', ylabel='count'>
In [1063]:
main.columns[125]
Out[1063]:
'Navigation System'
In [1064]:
main["Navigation System"]
Out[1064]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Navigation System, Length: 160031, dtype: object
In [1065]:
main["Navigation System"].unique()
Out[1065]:
array(['YES', nan, 'NO', '4 Zone'], dtype=object)
In [1066]:
plt.figure(figsize=(15,5))
sns.countplot(main["Navigation System"])
Out[1066]:
<AxesSubplot:xlabel='Navigation System', ylabel='count'>
In [1067]:
main["Navigation System"]=main["Navigation System"].apply(lambda x:preprocess_to_null_out(x,False))
In [1068]:
plt.figure(figsize=(15,5))
sns.countplot(main["Navigation System"])
Out[1068]:
<AxesSubplot:xlabel='Navigation System', ylabel='count'>
In [1069]:
main.columns[126]
Out[1069]:
'Outside Temperature Display'
In [1070]:
main["Outside Temperature Display"]
Out[1070]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Outside Temperature Display, Length: 160031, dtype: object
In [1071]:
main["Outside Temperature Display"].unique()
Out[1071]:
array(['YES', nan, 'NO'], dtype=object)
In [1072]:
plt.figure(figsize=(15,5))
sns.countplot(main["Outside Temperature Display"])
Out[1072]:
<AxesSubplot:xlabel='Outside Temperature Display', ylabel='count'>
In [1073]:
main.columns[127]
Out[1073]:
'Manually Adjustable Ext. Rear View Mirror'
In [1074]:
main["Manually Adjustable Ext. Rear View Mirror"]
Out[1074]:
0          NO
1         YES
2         NaN
3          NO
4         YES
          ... 
160026    NaN
160027    NaN
160028     NO
160029    NaN
160030    NaN
Name: Manually Adjustable Ext. Rear View Mirror, Length: 160031, dtype: object
In [1075]:
main["Manually Adjustable Ext. Rear View Mirror"].unique()
Out[1075]:
array(['NO', 'YES', nan], dtype=object)
In [1076]:
plt.figure(figsize=(15,5))
sns.countplot(main["Manually Adjustable Ext. Rear View Mirror"])
Out[1076]:
<AxesSubplot:xlabel='Manually Adjustable Ext. Rear View Mirror', ylabel='count'>
In [1077]:
main.columns[128]
Out[1077]:
'Power Antenna'
In [1078]:
main["Power Antenna"]
Out[1078]:
0         YES
1         NaN
2         NaN
3         YES
4          NO
          ... 
160026    NaN
160027    NaN
160028     NO
160029    NaN
160030    NaN
Name: Power Antenna, Length: 160031, dtype: object
In [1079]:
main["Power Antenna"].unique()
Out[1079]:
array(['YES', nan, 'NO'], dtype=object)
In [1080]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Antenna"])
Out[1080]:
<AxesSubplot:xlabel='Power Antenna', ylabel='count'>
In [1081]:
main.columns[129]
Out[1081]:
'Brake Assist'
In [1082]:
main["Brake Assist"]
Out[1082]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Brake Assist, Length: 160031, dtype: object
In [1083]:
main["Brake Assist"].unique()
Out[1083]:
array(['YES', nan, 'NO'], dtype=object)
In [1084]:
plt.figure(figsize=(15,5))
sns.countplot(main["Brake Assist"])
Out[1084]:
<AxesSubplot:xlabel='Brake Assist', ylabel='count'>
In [1085]:
main.columns[130]
Out[1085]:
'Anti-Theft Alarm'
In [1086]:
main["Anti-Theft Alarm"]
Out[1086]:
0         YES
1         NaN
2         YES
3         YES
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Anti-Theft Alarm, Length: 160031, dtype: object
In [1087]:
main["Anti-Theft Alarm"].unique()
Out[1087]:
array(['YES', nan, 'NO'], dtype=object)
In [1088]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti-Theft Alarm"])
Out[1088]:
<AxesSubplot:xlabel='Anti-Theft Alarm', ylabel='count'>
In [1089]:
main.columns[131]
Out[1089]:
'Speed Sensing Auto Door Lock'
In [1090]:
main["Speed Sensing Auto Door Lock"]
Out[1090]:
0         YES
1         NaN
2         NaN
3         YES
4          NO
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Speed Sensing Auto Door Lock, Length: 160031, dtype: object
In [1091]:
main["Speed Sensing Auto Door Lock"].unique()
Out[1091]:
array(['YES', nan, 'NO'], dtype=object)
In [1092]:
plt.figure(figsize=(15,5))
sns.countplot(main["Speed Sensing Auto Door Lock"])
Out[1092]:
<AxesSubplot:xlabel='Speed Sensing Auto Door Lock', ylabel='count'>
In [1093]:
main.columns[132]
Out[1093]:
'Chrome Garnish'
In [1094]:
main["Chrome Garnish"]
Out[1094]:
0         YES
1         NaN
2         NaN
3         YES
4         NaN
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Chrome Garnish, Length: 160031, dtype: object
In [1095]:
main["Chrome Garnish"].unique()
Out[1095]:
array(['YES', nan, 'NO'], dtype=object)
In [1096]:
plt.figure(figsize=(15,5))
sns.countplot(main["Chrome Garnish"])
Out[1096]:
<AxesSubplot:xlabel='Chrome Garnish', ylabel='count'>
In [1097]:
main.columns[133]
Out[1097]:
'Side Impact Beams'
In [1098]:
main["Side Impact Beams"]
Out[1098]:
0         NaN
1         YES
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Side Impact Beams, Length: 160031, dtype: object
In [1099]:
main["Side Impact Beams"].unique()
Out[1099]:
array([nan, 'YES', 'NO'], dtype=object)
In [1100]:
plt.figure(figsize=(15,5))
sns.countplot(main["Side Impact Beams"])
Out[1100]:
<AxesSubplot:xlabel='Side Impact Beams', ylabel='count'>
In [1101]:
main.columns[134]
Out[1101]:
'Drive Type'
In [1102]:
main["Drive Type"]
Out[1102]:
0         NaN
1         NaN
2         FWD
3         NaN
4         FWD
          ... 
160026    AWD
160027    NaN
160028    FWD
160029    NaN
160030    NaN
Name: Drive Type, Length: 160031, dtype: object
In [1103]:
main["Drive Type"].unique()
Out[1103]:
array([nan, 'FWD', '2WD', 'Front Wheel Drive', 'RWD', '2wd', '4WD', 'AWD',
        'Rear wheels', '4x2', '4x4', '4X4', 'RWD(with MTT)',
        'All Wheel Drive'], dtype=object)
In [1104]:
def preprocess_Drive_Type(text):
    text=str(text)
    if text=="nan":
        return np.nan
    text=text.title()
    return text
In [1105]:
main["Drive Type"]=main["Drive Type"].apply(preprocess_Drive_Type)
In [1106]:
main["Drive Type"].unique()
Out[1106]:
array([nan, 'Fwd', '2Wd', 'Front Wheel Drive', 'Rwd', '4Wd', 'Awd',
        'Rear Wheels', '4X2', '4X4', 'Rwd(With Mtt)', 'All Wheel Drive'],
      dtype=object)
In [1107]:
plt.figure(figsize=(15,5))
sns.countplot(main["Drive Type"])
Out[1107]:
<AxesSubplot:xlabel='Drive Type', ylabel='count'>
In [1108]:
main.columns[135]
Out[1108]:
'Rear Reading Lamp'
In [1109]:
main["Rear Reading Lamp"]
Out[1109]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Rear Reading Lamp, Length: 160031, dtype: object
In [1110]:
main["Rear Reading Lamp"].unique()
Out[1110]:
array([nan, 'NO', 'YES', '5', '4', '2765', '3 Zone'], dtype=object)
In [1111]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Reading Lamp"])
Out[1111]:
<AxesSubplot:xlabel='Rear Reading Lamp', ylabel='count'>
In [1112]:
main["Rear Reading Lamp"]=main["Rear Reading Lamp"].apply(lambda x:preprocess_to_null_out(x,False))
In [1113]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Reading Lamp"])
Out[1113]:
<AxesSubplot:xlabel='Rear Reading Lamp', ylabel='count'>
In [1114]:
main.columns[136]
Out[1114]:
'Cup Holders-Front'
In [1115]:
main["Cup Holders-Front"]
Out[1115]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Cup Holders-Front, Length: 160031, dtype: object
In [1116]:
main["Cup Holders-Front"].unique()
Out[1116]:
array([nan, 'YES', 'NO'], dtype=object)
In [1117]:
plt.figure(figsize=(15,5))
sns.countplot(main["Cup Holders-Front"])
Out[1117]:
<AxesSubplot:xlabel='Cup Holders-Front', ylabel='count'>
In [1118]:
main.columns[137]
Out[1118]:
'Leather Seats'
In [1119]:
main["Leather Seats"]
Out[1119]:
0         NaN
1         NaN
2          NO
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Leather Seats, Length: 160031, dtype: object
In [1120]:
main["Leather Seats"].unique()
Out[1120]:
array([nan, 'NO', 'YES', '6', '2'], dtype=object)
In [1121]:
plt.figure(figsize=(15,5))
sns.countplot(main["Leather Seats"])
Out[1121]:
<AxesSubplot:xlabel='Leather Seats', ylabel='count'>
In [1122]:
main["Leather Seats"]=main["Leather Seats"].apply(lambda x:preprocess_to_null_out(x,False))
In [1123]:
plt.figure(figsize=(15,5))
sns.countplot(main["Leather Seats"])
Out[1123]:
<AxesSubplot:xlabel='Leather Seats', ylabel='count'>
In [1124]:
main.columns[138]
Out[1124]:
'Driving Experience Control Eco'
In [1125]:
main["Driving Experience Control Eco"]
Out[1125]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Driving Experience Control Eco, Length: 160031, dtype: object
In [1126]:
main["Driving Experience Control Eco"].unique()
Out[1126]:
array([nan, 'YES', 'NO'], dtype=object)
In [1127]:
plt.figure(figsize=(15,5))
sns.countplot(main["Driving Experience Control Eco"])
Out[1127]:
<AxesSubplot:xlabel='Driving Experience Control Eco', ylabel='count'>
In [1128]:
main.columns[139]
Out[1128]:
'Ventilated Seats'
In [1129]:
main["Ventilated Seats"]
Out[1129]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Ventilated Seats, Length: 160031, dtype: object
In [1130]:
main["Ventilated Seats"].unique()
Out[1130]:
array([nan, 'YES', 'NO'], dtype=object)
In [1131]:
plt.figure(figsize=(15,5))
sns.countplot(main["Ventilated Seats"])
Out[1131]:
<AxesSubplot:xlabel='Ventilated Seats', ylabel='count'>
In [1132]:
main.columns[140]
Out[1132]:
'Tyre Pressure Monitor'
In [1133]:
main["Tyre Pressure Monitor"]
Out[1133]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Tyre Pressure Monitor, Length: 160031, dtype: object
In [1134]:
main["Tyre Pressure Monitor"].unique()
Out[1134]:
array([nan, 'YES', 'NO', 'Tubeless,Radial'], dtype=object)
In [1135]:
plt.figure(figsize=(15,5))
sns.countplot(main["Tyre Pressure Monitor"])
Out[1135]:
<AxesSubplot:xlabel='Tyre Pressure Monitor', ylabel='count'>
In [1136]:
main["Tyre Pressure Monitor"]=main["Tyre Pressure Monitor"].apply(lambda x:preprocess_to_null_out(x,False))
In [1137]:
plt.figure(figsize=(15,5))
sns.countplot(main["Tyre Pressure Monitor"])
Out[1137]:
<AxesSubplot:xlabel='Tyre Pressure Monitor', ylabel='count'>
In [1138]:
main.columns[141]
Out[1138]:
'Rain Sensing Wiper'
In [1139]:
main["Rain Sensing Wiper"]
Out[1139]:
0         NaN
1         NaN
2          NO
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Rain Sensing Wiper, Length: 160031, dtype: object
In [1140]:
main["Rain Sensing Wiper"].unique()
Out[1140]:
array([nan, 'NO', 'YES'], dtype=object)
In [1141]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rain Sensing Wiper"])
Out[1141]:
<AxesSubplot:xlabel='Rain Sensing Wiper', ylabel='count'>
In [1142]:
main.columns[142]
Out[1142]:
'Turbo Charger'
In [1143]:
main["Turbo Charger"]
Out[1143]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    NaN
160027    NaN
160028    Yes
160029    NaN
160030    NaN
Name: Turbo Charger, Length: 160031, dtype: object
In [1144]:
main["Turbo Charger"].unique()
Out[1144]:
array([nan, 'Yes', 'No', 'Twin', 'twin', 'Turbo', 'TWIN', 'YES'],
      dtype=object)
In [1145]:
plt.figure(figsize=(15,5))
sns.countplot(main["Turbo Charger"])
Out[1145]:
<AxesSubplot:xlabel='Turbo Charger', ylabel='count'>
In [1146]:
def preprocess_Turbo_Charger(text):
    text=str(text)
    if text=="nan":
        return np.nan
    text=text.title()
    if "Twin" in text or "Turbo" in text:
        return "YES"
    text=text.upper()
    return text
In [1147]:
main["Turbo Charger"]=main["Turbo Charger"].apply(preprocess_Turbo_Charger)
In [1148]:
main["Turbo Charger"].unique()
Out[1148]:
array([nan, 'YES', 'NO'], dtype=object)
In [1149]:
plt.figure(figsize=(15,5))
sns.countplot(main["Turbo Charger"])
Out[1149]:
<AxesSubplot:xlabel='Turbo Charger', ylabel='count'>
In [1150]:
main.columns[143]
Out[1150]:
'Air Quality Control'
In [1151]:
main["Air Quality Control"]
Out[1151]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Air Quality Control, Length: 160031, dtype: object
In [1152]:
main["Air Quality Control"].unique()
Out[1152]:
array([nan, 'NO', 'YES', '5', '4', '2923', '4 Zone'], dtype=object)
In [1153]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Quality Control"])
Out[1153]:
<AxesSubplot:xlabel='Air Quality Control', ylabel='count'>
In [1154]:
main["Air Quality Control"]=main["Air Quality Control"].apply(lambda x:preprocess_to_null_out(x,False))
In [1155]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Quality Control"])
Out[1155]:
<AxesSubplot:xlabel='Air Quality Control', ylabel='count'>
In [1156]:
main.columns[144]
Out[1156]:
'Traction Control'
In [1157]:
main["Traction Control"]
Out[1157]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    NaN
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Traction Control, Length: 160031, dtype: object
In [1158]:
main["Traction Control"].unique()
Out[1158]:
array([nan, 'YES', 'NO'], dtype=object)
In [1159]:
plt.figure(figsize=(15,5))
sns.countplot(main["Traction Control"])
Out[1159]:
<AxesSubplot:xlabel='Traction Control', ylabel='count'>
In [1160]:
main.columns[145]
Out[1160]:
'Vehicle Stability Control System'
In [1161]:
main["Vehicle Stability Control System"]
Out[1161]:
0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
          ... 
160026    YES
160027    NaN
160028    YES
160029    NaN
160030    NaN
Name: Vehicle Stability Control System, Length: 160031, dtype: object
In [1162]:
main["Vehicle Stability Control System"].unique()
Out[1162]:
array([nan, 'YES', 'NO'], dtype=object)
In [1163]:
plt.figure(figsize=(15,5))
sns.countplot(main["Vehicle Stability Control System"])
Out[1163]:
<AxesSubplot:xlabel='Vehicle Stability Control System', ylabel='count'>
In [4]:
main["Low Fuel Warning Light"]
Out[4]:
0            YES
1            NaN
2            YES
3            YES
4             NO
            ...  
105797       YES
105798    4 Zone
105799       YES
105800       YES
105801       YES
Name: Low Fuel Warning Light, Length: 105802, dtype: object
In [5]:
main["Low Fuel Warning Light"].unique()
Out[5]:
array(['YES', nan, 'NO', '5', '4', '3 Zone', '2765', '4 Zone'],
      dtype=object)
In [6]:
plt.figure(figsize=(15,5))
sns.countplot(main["Low Fuel Warning Light"])
Out[6]:
<AxesSubplot:xlabel='Low Fuel Warning Light', ylabel='count'>
In [7]:
main["Low Fuel Warning Light"]=main["Low Fuel Warning Light"].apply(lambda x:preprocess_to_null_out(x,False))
In [10]:
main["Accessory Power Outlet"]
Out[10]:
0            YES
1            YES
2            NaN
3            YES
4             NO
            ...  
105797       YES
105798    4 Zone
105799       YES
105800       YES
105801       YES
Name: Accessory Power Outlet, Length: 105802, dtype: object
In [11]:
main["Accessory Power Outlet"].unique()
Out[11]:
array(['YES', nan, 'NO', '5', '4', '2 Zone', '3 Zone', '2765', '4 Zone'],
      dtype=object)
In [12]:
plt.figure(figsize=(15,5))
sns.countplot(main["Accessory Power Outlet"])
Out[12]:
<AxesSubplot:xlabel='Accessory Power Outlet', ylabel='count'>
In [13]:
main["Accessory Power Outlet"]=main["Accessory Power Outlet"].apply(lambda x:preprocess_to_null_out(x,False))
In [14]:
main["Trunk Light"]
Out[14]:
0         YES
1         NaN
2         NaN
3         YES
4         NaN
          ... 
105797    YES
105798    YES
105799    YES
105800    YES
105801    YES
Name: Trunk Light, Length: 105802, dtype: object
In [15]:
main["Trunk Light"].unique()
Out[15]:
array(['YES', nan, 'NO', '5', '4', '2 Zone', '3 Zone', '2765'],
      dtype=object)
In [16]:
plt.figure(figsize=(15,5))
sns.countplot(main["Trunk Light"])
Out[16]:
<AxesSubplot:xlabel='Trunk Light', ylabel='count'>
In [17]:
main["Trunk Light"]=main["Trunk Light"].apply(lambda x:preprocess_to_null_out(x,False))
In [18]:
main["Rear Seat Headrest"]
Out[18]:
0         YES
1         YES
2         YES
3         YES
4         YES
          ... 
105797    YES
105798    YES
105799    YES
105800    YES
105801    YES
Name: Rear Seat Headrest, Length: 105802, dtype: object
In [19]:
main["Rear Seat Headrest"].unique()
Out[19]:
array(['YES', nan, 'NO', '5', '4', '2765', '3 Zone'], dtype=object)
In [20]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Seat Headrest"])
Out[20]:
<AxesSubplot:xlabel='Rear Seat Headrest', ylabel='count'>
In [21]:
main["Rear Seat Headrest"]=main["Rear Seat Headrest"].apply(lambda x:preprocess_to_null_out(x,False))
In [1167]:
#Drop duplicates rows again beacuse of place column
main=main.drop_duplicates()
In [1168]:
#Displacement (cc) and Engine Displacement (cc) column are same
main.drop(["Displacement (cc)"],axis=1,inplace=True)
In [1169]:
# No. of cylinder and Valves Per Cylinder column are same
main.drop(["Valves Per Cylinder"],axis=1,inplace=True)
In [1170]:
# ARAI Mileage and Petrol Mileage (ARAI) column are same
main.drop(["Petrol Mileage (ARAI)"],axis=1,inplace=True)
In [3]:
main.drop(["Petrol Fuel Tank Capacity (Litres)"],axis=1,inplace=True)
main.drop(["Power Windows-Rear"],axis=1,inplace=True)
main.drop(["Power Windows-Front"],axis=1,inplace=True)
In [ ]:
main.to_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//Preprocessed_datas.csv")

Missing Imputation¶

  All the Missing values in the columns are based on the MCAR(Missing completely at random),so i am using the MCAR tecniques like
    1.Dropping Features
    2.Mean,Mode,Medium
    3.End of tail distribution
    4.Minimum imputation 
    5.Maximum imputation
  Here we are dealing with MCAR so imputation is done independently on the each Brand then only we can make correct guess
In [40]:
main.isnull().sum()
Out[40]:
Model                     0
Brand                     0
Varient                   0
ARAI Mileage(Km/L)    24974
                      ...  
BHP                    4455
RPM                    4455
NM                     4555
NM_RPM                 4555
Length: 147, dtype: int64
In [44]:
ms.bar(main)
Out[44]:
<AxesSubplot:>
In [45]:
pd.set_option("display.max_rows",8)
In [46]:
calculate_missing_percentage(main)
Out[46]:
Fearure Percentage
0 Model 0.000000
1 Brand 0.000000
2 Varient 0.000000
3 ARAI Mileage(Km/L) 0.236045
... ... ...
143 BHP 0.042107
144 RPM 0.042107
145 NM 0.043052
146 NM_RPM 0.043052

147 rows × 2 columns

In [23]:
main=main.sort_values(by="Brand")
In [24]:
Filled_DataFrame=pd.DataFrame()
In [25]:
def fill_values_for_numericals(models,brands,column,enabler):
    global Filled_DataFrame
    entire=main["{}".format(column)]
    entire=entire.loc[entire.isnull()==False]
    com=main.loc[(main["Model"]=="{}".format(models))]["{}".format(column)]
    com=com.loc[com.isnull()==False]
    com1=main.loc[(main["Model"]=="{}".format(models))&(main["Brand"]=="{}".format(brands))][["Brand","{}".format(column)]]
    computed_column=com1["{}".format(column)]
    if computed_column.isnull().any()==True and computed_column.isnull().all()==True:
        if com.isnull().all()==True:
            if enabler==True:
                temp_=entire.apply(lambda x:str(x))
                com1["{}".format(column)].fillna(float(temp_.mode()),inplace=True)
                Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
            else:
                skew_value=entire.skew()
                if skew_value<-0.5 or skew_value>0.5:
                    com1["{}".format(column)].fillna(entire.median(),inplace=True)
                    Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
                else:
                    com1["{}".format(column)].fillna(entire.mean(),inplace=True)
                    Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
        else:
            if enabler==True:
                temp_=com.apply(lambda x:str(x))
                com1["{}".format(column)].fillna(float(temp_.mode()),inplace=True)
                Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
            else:
                skew_value=com.skew()
                if skew_value<-0.5 or skew_value>0.5:
                    com1["{}".format(column)].fillna(com.median(),inplace=True)
                    Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
                else:
                    com1["{}".format(column)].fillna(com.mean(),inplace=True)
                    Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
    else:
        com1_without_null=computed_column.loc[computed_column.isnull()==False]
        if enabler==True:
            temp_=com1_without_null.apply(lambda x:str(x))
            com1["{}".format(column)].fillna(float(temp_.mode()[0]),inplace=True)
            Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
        else:
            skew_value=com1_without_null.skew()
            if skew_value<-0.5 or skew_value>0.5:
                com1["{}".format(column)].fillna(com1_without_null.median(),inplace=True)
                Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
            else:
                com1["{}".format(column)].fillna(com1_without_null.mean(),inplace=True)
                Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
In [26]:
def prepare_data_for_create_dataframe(data):
    models=[]
    brands=[]
    for i in data:
        models.append(i[0])
        brands.append(i[1])
    return pd.DataFrame({
        "Model":models,
        "Brand":brands
    })    
In [27]:
non_repitation_data=list(set(zip(main[["Model","Brand"]]["Model"],main[["Model","Brand"]]["Brand"])))
In [28]:
prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand")
Out[28]:
Model Brand
59 BMW 2 Series
94 Ferrari 296 GTB
211 BMW 3 Series
58 BMW 5 Series
228 BMW 6 Series
... ... ...
98 Audi e-tron GT
79 Hyundai i20
56 Hyundai i20 N Line
113 BMW i4
217 BMW iX

248 rows × 2 columns

In [29]:
len(non_repitation_data)
Out[29]:
248
In [30]:
len(main["Brand"].unique())
Out[30]:
248
In [31]:
main.shape
Out[31]:
(105802, 148)
In [32]:
Filled_DataFrame=pd.DataFrame()
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"ARAI Mileage(Km/L)",False),axis=1)
In [33]:
main[["Model","Brand","ARAI Mileage(Km/L)"]].iloc[100]
Out[33]:
Model                      BMW
Brand                 2 Series
ARAI Mileage(Km/L)       14.82
Name: 28373, dtype: object
In [34]:
Filled_DataFrame["ARAI Mileage(Km/L)"].isnull().sum()
Out[34]:
0
In [35]:
main["ARAI Mileage(Km/L)"]=Filled_DataFrame["ARAI Mileage(Km/L)"]
In [36]:
Filled_DataFrame[["Brand","ARAI Mileage(Km/L)"]].iloc[100]
Out[36]:
Brand                 2 Series
ARAI Mileage(Km/L)       14.82
Name: 28373, dtype: object
In [37]:
main["ARAI Mileage(Km/L)"].isnull().sum()
Out[37]:
0
In [38]:
# Engine Displacement (cc)
Filled_DataFrame=pd.DataFrame()
main["Engine Displacement (cc)"].isnull().sum()
Out[38]:
6703
In [39]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Engine Displacement (cc)",False),axis=1)
In [40]:
main[["Model","Brand","Engine Displacement (cc)"]].iloc[101921]
Out[40]:
Model                       Mahindra
Brand                         XUV700
Engine Displacement (cc)      2198.0
Name: 61152, dtype: object
In [41]:
Filled_DataFrame["Engine Displacement (cc)"].isnull().sum()
Out[41]:
0
In [42]:
main["Engine Displacement (cc)"]=Filled_DataFrame["Engine Displacement (cc)"]
In [43]:
Filled_DataFrame[["Brand","Engine Displacement (cc)"]].iloc[101921]
Out[43]:
Brand                       XUV700
Engine Displacement (cc)    2198.0
Name: 61152, dtype: object
In [44]:
main["Engine Displacement (cc)"].isnull().sum()
Out[44]:
0
In [45]:
main.columns[12]
Out[45]:
'Fuel Tank Capacity'
In [46]:
# Boot Space (Litres)
Filled_DataFrame=pd.DataFrame()
main["Fuel Tank Capacity"]
Out[46]:
26993    51.0
27910    51.0
27903    51.0
28435    50.0
28120    50.0
          ... 
27057     NaN
27583     NaN
27572     NaN
26558     NaN
26874     NaN
Name: Fuel Tank Capacity, Length: 105802, dtype: float64
In [47]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Fuel Tank Capacity",False),axis=1)
In [48]:
main[["Model","Brand","Fuel Tank Capacity"]].iloc[79190]
Out[48]:
Model                  Mahindra
Brand                 Scorpio-N
Fuel Tank Capacity          NaN
Name: 60831, dtype: object
In [49]:
Filled_DataFrame["Fuel Tank Capacity"].isnull().sum()
Out[49]:
0
In [50]:
main["Fuel Tank Capacity"]=Filled_DataFrame["Fuel Tank Capacity"]
In [51]:
Filled_DataFrame[["Brand","Fuel Tank Capacity"]].iloc[79190]
Out[51]:
Brand                 Scorpio-N
Fuel Tank Capacity    52.378078
Name: 60831, dtype: object
In [52]:
main["Fuel Tank Capacity"].isnull().sum()
Out[52]:
0
In [59]:
main["Fuel Tank Capacity"].isnull().sum()
Out[59]:
0
In [60]:
# Length (mm)
Filled_DataFrame=pd.DataFrame()
main["Length (mm)"]
Out[60]:
26993    4526.0
27910    4526.0
27903    4526.0
28435    4526.0
28120    4526.0
          ...  
27057    4953.0
27583    4953.0
27572    4953.0
26558    4953.0
26874    4953.0
Name: Length (mm), Length: 105802, dtype: float64
In [61]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Length (mm)",False),axis=1)
In [62]:
main[["Model","Brand","Length (mm)"]].iloc[79190]
Out[62]:
Model           Mahindra
Brand          Scorpio-N
Length (mm)       4662.0
Name: 60831, dtype: object
In [63]:
Filled_DataFrame["Length (mm)"].isnull().sum()
Out[63]:
0
In [64]:
main["Length (mm)"]=Filled_DataFrame["Length (mm)"]
In [65]:
Filled_DataFrame[["Brand","Length (mm)"]].iloc[79190]
Out[65]:
Brand          Scorpio-N
Length (mm)       4662.0
Name: 60831, dtype: object
In [66]:
main["Length (mm)"].isnull().sum()
Out[66]:
0
In [67]:
# Width (mm)
Filled_DataFrame=pd.DataFrame()
main["Width (mm)"]
Out[67]:
26993    2081.0
27910    2081.0
27903    2081.0
28435    2081.0
28120    2081.0
          ...  
27057    2230.0
27583    2230.0
27572    2230.0
26558    2230.0
26874    2230.0
Name: Width (mm), Length: 105802, dtype: float64
In [68]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Width (mm)",False),axis=1)
In [69]:
main[["Model","Brand","Width (mm)"]].iloc[79190]
Out[69]:
Model          Mahindra
Brand         Scorpio-N
Width (mm)       1917.0
Name: 60831, dtype: object
In [70]:
Filled_DataFrame["Width (mm)"].isnull().sum()
Out[70]:
0
In [71]:
main["Width (mm)"]=Filled_DataFrame["Width (mm)"]
In [72]:
Filled_DataFrame[["Brand","Width (mm)"]].iloc[79190]
Out[72]:
Brand         Scorpio-N
Width (mm)       1917.0
Name: 60831, dtype: object
In [73]:
# Height (mm)
Filled_DataFrame=pd.DataFrame()
main["Height (mm)"]
Out[73]:
26993    1420.0
27910    1420.0
27903    1420.0
28435    1420.0
28120    1420.0
          ...  
27057    1695.0
27583    1695.0
27572    1695.0
26558    1695.0
26874    1695.0
Name: Height (mm), Length: 105802, dtype: float64
In [74]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Height (mm)",False),axis=1)
In [76]:
Filled_DataFrame["Height (mm)"].isnull().sum()
Out[76]:
0
In [77]:
main["Height (mm)"]=Filled_DataFrame["Height (mm)"]
In [79]:
# City Mileage(Km/L)
Filled_DataFrame=pd.DataFrame()
main["City Mileage(Km/L)"]
Out[79]:
26993   NaN
27910   NaN
27903   NaN
28435   NaN
28120   NaN
          ..
27057   NaN
27583   NaN
27572   NaN
26558   NaN
26874   NaN
Name: City Mileage(Km/L), Length: 105802, dtype: float64
In [80]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"City Mileage(Km/L)",False),axis=1)
In [81]:
main[["Model","Brand","City Mileage(Km/L)"]].iloc[79190]
Out[81]:
Model                  Mahindra
Brand                 Scorpio-N
City Mileage(Km/L)          NaN
Name: 60831, dtype: object
In [82]:
Filled_DataFrame["City Mileage(Km/L)"].isnull().sum()
Out[82]:
0
In [83]:
main["City Mileage(Km/L)"]=Filled_DataFrame["City Mileage(Km/L)"]
In [84]:
Filled_DataFrame[["Brand","City Mileage(Km/L)"]].iloc[79190]
Out[84]:
Brand                 Scorpio-N
City Mileage(Km/L)    13.827467
Name: 60831, dtype: object
In [85]:
Filled_DataFrame=pd.DataFrame()
main["Boot Space (Litres)"]
Out[85]:
26993   NaN
27910   NaN
27903   NaN
28435   NaN
28120   NaN
          ..
27057   NaN
27583   NaN
27572   NaN
26558   NaN
26874   NaN
Name: Boot Space (Litres), Length: 105802, dtype: float64
In [86]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Boot Space (Litres)",False),axis=1)
In [87]:
main[["Model","Brand","Boot Space (Litres)"]].iloc[34503]
Out[87]:
Model                    Ford
Brand                  Fiesta
Boot Space (Litres)     430.0
Name: 21257, dtype: object
In [88]:
Filled_DataFrame["Boot Space (Litres)"].isnull().sum()
Out[88]:
0
In [89]:
main["Boot Space (Litres)"]=Filled_DataFrame["Boot Space (Litres)"]
In [90]:
Filled_DataFrame[["Brand","Boot Space (Litres)"]].iloc[34503]
Out[90]:
Brand                  Fiesta
Boot Space (Litres)     430.0
Name: 21257, dtype: object
In [91]:
# BHP
Filled_DataFrame=pd.DataFrame()
main["BHP"]
Out[91]:
26993    187.74
27910    187.74
27903    187.74
28435    189.08
28120    189.08
          ...  
27057    321.84
27583    321.84
27572    321.84
26558    321.84
26874    321.84
Name: BHP, Length: 105802, dtype: float64
In [92]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"BHP",False),axis=1)
In [93]:
main[["Model","Brand","BHP"]].iloc[79190]
Out[93]:
Model     Mahindra
Brand    Scorpio-N
BHP            NaN
Name: 60831, dtype: object
In [94]:
Filled_DataFrame["BHP"].isnull().sum()
Out[94]:
0
In [95]:
main["BHP"]=Filled_DataFrame["BHP"]
In [96]:
Filled_DataFrame[["Brand","BHP"]].iloc[79190]
Out[96]:
Brand     Scorpio-N
BHP      133.238545
Name: 60831, dtype: object
In [97]:
# RPM
Filled_DataFrame=pd.DataFrame()
main["RPM"]
Out[97]:
26993    4000.0
27910    4000.0
27903    4000.0
28435    5000.0
28120    5000.0
          ...  
27057       0.0
27583       0.0
27572       0.0
26558       0.0
26874       0.0
Name: RPM, Length: 105802, dtype: float64
In [98]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"RPM",False),axis=1)
In [99]:
main[["Model","Brand","RPM"]].iloc[79190]
Out[99]:
Model     Mahindra
Brand    Scorpio-N
RPM            NaN
Name: 60831, dtype: object
In [100]:
Filled_DataFrame["RPM"].isnull().sum()
Out[100]:
0
In [101]:
main["RPM"]=Filled_DataFrame["RPM"]
In [102]:
Filled_DataFrame[["Brand","RPM"]].iloc[79190]
Out[102]:
Brand    Scorpio-N
RPM         3750.0
Name: 60831, dtype: object
In [103]:
# NM
Filled_DataFrame=pd.DataFrame()
main["NM"]
Out[103]:
26993    400.0
27910    400.0
27903    400.0
28435    280.0
28120    280.0
          ...  
27057    630.0
27583    630.0
27572    630.0
26558    630.0
26874    630.0
Name: NM, Length: 105802, dtype: float64
In [104]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"NM",False),axis=1)
In [105]:
main[["Model","Brand","NM"]].iloc[79190]
Out[105]:
Model     Mahindra
Brand    Scorpio-N
NM             NaN
Name: 60831, dtype: object
In [106]:
Filled_DataFrame["NM"].isnull().sum()
Out[106]:
0
In [107]:
main["NM"]=Filled_DataFrame["NM"]
In [108]:
Filled_DataFrame[["Brand","NM"]].iloc[79190]
Out[108]:
Brand     Scorpio-N
NM       299.823959
Name: 60831, dtype: object
In [109]:
# NM_RPM
Filled_DataFrame=pd.DataFrame()
main["NM_RPM"]
Out[109]:
26993    2125.0
27910    2125.0
27903    2125.0
28435    2975.0
28120    2975.0
          ...  
27057       0.0
27583       0.0
27572       0.0
26558       0.0
26874       0.0
Name: NM_RPM, Length: 105802, dtype: float64
In [110]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"NM_RPM",False),axis=1)
In [111]:
main[["Model","Brand","NM_RPM"]].iloc[79190]
Out[111]:
Model      Mahindra
Brand     Scorpio-N
NM_RPM          NaN
Name: 60831, dtype: object
In [112]:
Filled_DataFrame["NM_RPM"].isnull().sum()
Out[112]:
0
In [113]:
main["NM_RPM"]=Filled_DataFrame["NM_RPM"]
In [114]:
Filled_DataFrame[["Brand","NM_RPM"]].iloc[79190]
Out[114]:
Brand     Scorpio-N
NM_RPM       2200.0
Name: 60831, dtype: object
In [115]:
# Seating Capacity
Filled_DataFrame=pd.DataFrame()
main["Seating Capacity"]
Out[115]:
26993    5.0
27910    5.0
27903    5.0
28435    5.0
28120    5.0
        ... 
27057    5.0
27583    5.0
27572    5.0
26558    5.0
26874    5.0
Name: Seating Capacity, Length: 105802, dtype: float64
In [116]:
main["Seating Capacity"].unique()
Out[116]:
array([ 5.,  2.,  4., nan,  6.,  7.,  0.,  8.])
In [117]:
def remove_zero_into_null(text):
    if text==0.0:
        return np.nan
    return text
In [118]:
main["Seating Capacity"]=main["Seating Capacity"].apply(remove_zero_into_null)
In [119]:
main["Seating Capacity"].unique()
Out[119]:
array([ 5.,  2.,  4., nan,  6.,  7.,  8.])
In [120]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Seating Capacity",True),axis=1)
In [121]:
main[["Model","Brand","Seating Capacity"]].iloc[34503]
Out[121]:
Model                 Ford
Brand               Fiesta
Seating Capacity       5.0
Name: 21257, dtype: object
In [122]:
Filled_DataFrame["Seating Capacity"].isnull().sum()
Out[122]:
0
In [123]:
main["Seating Capacity"]=Filled_DataFrame["Seating Capacity"]
In [124]:
Filled_DataFrame[["Brand","Seating Capacity"]].iloc[34503]
Out[124]:
Brand               Fiesta
Seating Capacity       5.0
Name: 21257, dtype: object
In [125]:
main["Seating Capacity"].unique()
Out[125]:
array([5., 2., 4., 6., 7., 8.])
In [126]:
Filled_DataFrame=pd.DataFrame()
main["No. of cylinder"]
Out[126]:
26993    4.0
27910    4.0
27903    4.0
28435    4.0
28120    4.0
        ... 
27057    NaN
27583    NaN
27572    NaN
26558    NaN
26874    NaN
Name: No. of cylinder, Length: 105802, dtype: float64
In [127]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"No. of cylinder",True),axis=1)
In [128]:
main[["Model","Brand","No. of cylinder"]].iloc[34503]
Out[128]:
Model                Ford
Brand              Fiesta
No. of cylinder       4.0
Name: 21257, dtype: object
In [129]:
Filled_DataFrame["No. of cylinder"].isnull().sum()
Out[129]:
0
In [130]:
main["No. of cylinder"]=Filled_DataFrame["No. of cylinder"]
In [131]:
Filled_DataFrame[["Brand","No. of cylinder"]].iloc[345]
Out[131]:
Brand              3 Series
No. of cylinder         4.0
Name: 27951, dtype: object
In [132]:
def fill_catagorical_values(models,brands,column):
    global Filled_DataFrame
    entire=main["{}".format(column)]
    entire=entire.loc[entire.isnull()==False]
    com=main.loc[(main["Model"]=="{}".format(models))]["{}".format(column)]
    com=com.loc[com.isnull()==False]
    com1=main.loc[(main["Model"]=="{}".format(models))&(main["Brand"]=="{}".format(brands))][["Brand","{}".format(column)]]
    computed_column=com1["{}".format(column)]
    if computed_column.isnull().any()==True and computed_column.isnull().all()==True:
        if com.isnull().all()==True:
            com1["{}".format(column)].fillna(entire.mode()[0],inplace=True)
            Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
        else:
            com1["{}".format(column)].fillna(com.mode()[0],inplace=True)
            Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
    else:
        com1_without_null=computed_column.loc[computed_column.isnull()==False]
        com1["{}".format(column)].fillna(com1_without_null.mode()[0],inplace=True)
        Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
In [133]:
Filled_DataFrame=pd.DataFrame()
main["Body Type"]
Out[133]:
26993    Sedan
27910    Sedan
27903    Sedan
28435    Sedan
28120    Sedan
          ...  
27057      SUV
27583      SUV
27572      SUV
26558      SUV
26874      SUV
Name: Body Type, Length: 105802, dtype: object
In [134]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Body Type"),axis=1)
In [135]:
main["Body Type"]=Filled_DataFrame["Body Type"]
In [136]:
# Multi-function Steering Wheel
Filled_DataFrame=pd.DataFrame()
main["Multi-function Steering Wheel"]
Out[136]:
26993    YES
27910    YES
27903    YES
28435    YES
28120    YES
        ... 
27057    YES
27583    YES
27572    YES
26558    YES
26874    YES
Name: Multi-function Steering Wheel, Length: 105802, dtype: object
In [137]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Multi-function Steering Wheel"),axis=1)
In [138]:
main["Multi-function Steering Wheel"]=Filled_DataFrame["Multi-function Steering Wheel"]
In [139]:
# Touch Screen
Filled_DataFrame=pd.DataFrame()
main["Touch Screen"]
Out[139]:
26993    YES
27910    YES
27903    YES
28435    YES
28120    YES
        ... 
27057    YES
27583    YES
27572    YES
26558    YES
26874    YES
Name: Touch Screen, Length: 105802, dtype: object
In [140]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Touch Screen"),axis=1)
In [141]:
main["Touch Screen"]=Filled_DataFrame["Touch Screen"]
In [142]:
# Engine Start Stop Button
Filled_DataFrame=pd.DataFrame()
main["Engine Start Stop Button"].isnull().sum()
Out[142]:
13716
In [143]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Start Stop Button"),axis=1)
In [144]:
main["Engine Start Stop Button"]=Filled_DataFrame["Engine Start Stop Button"]
In [145]:
# Alloy Wheels
Filled_DataFrame=pd.DataFrame()
main["Alloy Wheels"].isnull().sum()
Out[145]:
5901
In [146]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Alloy Wheels"),axis=1)
In [147]:
main["Alloy Wheels"]=Filled_DataFrame["Alloy Wheels"]
In [148]:
# Power Windows Rear
Filled_DataFrame=pd.DataFrame()
main["Power Windows Rear"].isnull().sum()
Out[148]:
5948
In [149]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Windows Rear"),axis=1)
In [150]:
main["Power Windows Rear"]=Filled_DataFrame["Power Windows Rear"]
In [151]:
# Wheel Covers
Filled_DataFrame=pd.DataFrame()
main["Wheel Covers"].isnull().sum()
Out[151]:
19999
In [152]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Wheel Covers"),axis=1)
In [153]:
main["Wheel Covers"]=Filled_DataFrame["Wheel Covers"]
In [154]:
# Driver Airbag
Filled_DataFrame=pd.DataFrame()
main["Driver Airbag"].isnull().sum()
Out[154]:
2930
In [155]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Driver Airbag"),axis=1)
In [156]:
main["Driver Airbag"]=Filled_DataFrame["Driver Airbag"]
In [157]:
main["Wheel Covers"].isnull().sum()
Out[157]:
0
In [158]:
# Air Conditioner
Filled_DataFrame=pd.DataFrame()
main["Air Conditioner"].isnull().sum()
Out[158]:
2235
In [159]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Air Conditioner"),axis=1)
In [160]:
main["Air Conditioner"]=Filled_DataFrame["Air Conditioner"]
In [161]:
# Power Adjustable Exterior Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Power Adjustable Exterior Rear View Mirror"].isnull().sum()
Out[161]:
3260
In [162]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Adjustable Exterior Rear View Mirror"),axis=1)
In [163]:
main["Power Adjustable Exterior Rear View Mirror"]=Filled_DataFrame["Power Adjustable Exterior Rear View Mirror"]
In [164]:
# Automatic Climate Control
Filled_DataFrame=pd.DataFrame()
main["Automatic Climate Control"].isnull().sum()
Out[164]:
10347
In [165]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Automatic Climate Control"),axis=1)
In [166]:
main["Automatic Climate Control"]=Filled_DataFrame["Automatic Climate Control"]
In [167]:
# Anti Lock Braking System
Filled_DataFrame=pd.DataFrame()
main["Anti Lock Braking System"].isnull().sum()
Out[167]:
3592
In [168]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Anti Lock Braking System"),axis=1)
In [169]:
main["Anti Lock Braking System"]=Filled_DataFrame["Anti Lock Braking System"]
In [170]:
# Fog Lights - Front
Filled_DataFrame=pd.DataFrame()
main["Fog Lights - Front"].isnull().sum()
Out[170]:
17867
In [171]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Fog Lights - Front"),axis=1)
In [172]:
main["Fog Lights - Front"]=Filled_DataFrame["Fog Lights - Front"]
In [173]:
# Power Windows Front
Filled_DataFrame=pd.DataFrame()
main["Power Windows Front"].isnull().sum()
Out[173]:
3336
In [174]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Windows Front"),axis=1)
In [175]:
main["Power Windows Front"]=Filled_DataFrame["Power Windows Front"]
In [176]:
# Passenger Airbag
Filled_DataFrame=pd.DataFrame()
main["Passenger Airbag"].isnull().sum()
Out[176]:
1574
In [177]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Passenger Airbag"),axis=1)
In [178]:
main["Passenger Airbag"]=Filled_DataFrame["Passenger Airbag"]
In [179]:
# Power Steering
Filled_DataFrame=pd.DataFrame()
main["Power Steering"].isnull().sum()
Out[179]:
8957
In [180]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Steering"),axis=1)
In [181]:
main["Power Steering"]=Filled_DataFrame["Power Steering"]
In [182]:
# Engine Type
Filled_DataFrame=pd.DataFrame()
main["Engine Type"].isnull().sum()
Out[182]:
5964
In [183]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Type"),axis=1)
main["Engine Type"]=Filled_DataFrame["Engine Type"]
In [184]:
# Emission Norm Compliance
Filled_DataFrame=pd.DataFrame()
main["Emission Norm Compliance"].isnull().sum()
Out[184]:
2344
In [185]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Emission Norm Compliance"),axis=1)
main["Emission Norm Compliance"]=Filled_DataFrame["Emission Norm Compliance"]
In [186]:
# Front Suspension
Filled_DataFrame=pd.DataFrame()
main["Front Suspension"].isnull().sum()
Out[186]:
2656
In [187]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Front Suspension"),axis=1)
main["Front Suspension"]=Filled_DataFrame["Front Suspension"]
In [188]:
# Rear Suspension
Filled_DataFrame=pd.DataFrame()
main["Rear Suspension"].isnull().sum()
Out[188]:
3030
In [189]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Suspension"),axis=1)
main["Rear Suspension"]=Filled_DataFrame["Rear Suspension"]
In [190]:
# Steering Type
Filled_DataFrame=pd.DataFrame()
main["Steering Type"].isnull().sum()
Out[190]:
14349
In [191]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Steering Type"),axis=1)
main["Steering Type"]=Filled_DataFrame["Steering Type"]
In [192]:
# Steering Column
Filled_DataFrame=pd.DataFrame()
main["Steering Column"].isnull().sum()
Out[192]:
14258
In [193]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Steering Column"),axis=1)
main["Steering Column"]=Filled_DataFrame["Steering Column"]
In [194]:
# Front Brake Type
Filled_DataFrame=pd.DataFrame()
main["Front Brake Type"].isnull().sum()
Out[194]:
2693
In [195]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Front Brake Type"),axis=1)
main["Front Brake Type"]=Filled_DataFrame["Front Brake Type"]
In [196]:
# Rear Brake Type
Filled_DataFrame=pd.DataFrame()
main["Rear Brake Type"].isnull().sum()
Out[196]:
5010
In [197]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Brake Type"),axis=1)
main["Rear Brake Type"]=Filled_DataFrame["Rear Brake Type"]
In [202]:
# Heater
Filled_DataFrame=pd.DataFrame()
main["Heater"].isnull().sum()
Out[202]:
4065
In [203]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Heater"),axis=1)
main["Heater"]=Filled_DataFrame["Heater"]
In [204]:
# Adjustable Steering
Filled_DataFrame=pd.DataFrame()
main["Adjustable Steering"].isnull().sum()
Out[204]:
12822
In [205]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Adjustable Steering"),axis=1)
main["Adjustable Steering"]=Filled_DataFrame["Adjustable Steering"]
In [206]:
# Low Fuel Warning Light
Filled_DataFrame=pd.DataFrame()
main["Low Fuel Warning Light"].isnull().sum()
Out[206]:
4155
In [207]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Low Fuel Warning Light"),axis=1)
main["Low Fuel Warning Light"]=Filled_DataFrame["Low Fuel Warning Light"]
In [208]:
# Accessory Power Outlet
Filled_DataFrame=pd.DataFrame()
main["Accessory Power Outlet"].isnull().sum()
Out[208]:
2813
In [209]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Accessory Power Outlet"),axis=1)
main["Accessory Power Outlet"]=Filled_DataFrame["Accessory Power Outlet"]
In [210]:
# Trunk Light
Filled_DataFrame=pd.DataFrame()
main["Trunk Light"].isnull().sum()
Out[210]:
43805
In [211]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Trunk Light"),axis=1)
main["Trunk Light"]=Filled_DataFrame["Trunk Light"]
In [212]:
# Rear Seat Headrest
Filled_DataFrame=pd.DataFrame()
main["Rear Seat Headrest"].isnull().sum()
Out[212]:
5122
In [213]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Seat Headrest"),axis=1)
main["Rear Seat Headrest"]=Filled_DataFrame["Rear Seat Headrest"]
In [214]:
# Adjustable Headrest
Filled_DataFrame=pd.DataFrame()
main["Adjustable Headrest"].isnull().sum()
Out[214]:
25085
In [215]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Adjustable Headrest"),axis=1)
main["Adjustable Headrest"]=Filled_DataFrame["Adjustable Headrest"]
In [216]:
# Rear Seat Centre Arm Rest
Filled_DataFrame=pd.DataFrame()
main["Rear Seat Centre Arm Rest"].isnull().sum()
Out[216]:
21040
In [217]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Seat Centre Arm Rest"),axis=1)
main["Rear Seat Centre Arm Rest"]=Filled_DataFrame["Rear Seat Centre Arm Rest"]
In [218]:
# Height Adjustable Front Seat Belts
Filled_DataFrame=pd.DataFrame()
main["Height Adjustable Front Seat Belts"].isnull().sum()
Out[218]:
46851
In [219]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Height Adjustable Front Seat Belts"),axis=1)
main["Height Adjustable Front Seat Belts"]=Filled_DataFrame["Height Adjustable Front Seat Belts"]
In [220]:
# Cup Holders-Rear
Filled_DataFrame=pd.DataFrame()
main["Cup Holders-Rear"].isnull().sum()
Out[220]:
28202
In [221]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Cup Holders-Rear"),axis=1)
main["Cup Holders-Rear"]=Filled_DataFrame["Cup Holders-Rear"]
In [222]:
# Rear AC Vents
Filled_DataFrame=pd.DataFrame()
main["Rear AC Vents"].isnull().sum()
Out[222]:
20459
In [223]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear AC Vents"),axis=1)
main["Rear AC Vents"]=Filled_DataFrame["Rear AC Vents"]
In [224]:
# Seat Lumbar Support
Filled_DataFrame=pd.DataFrame()
main["Seat Lumbar Support"].isnull().sum()
Out[224]:
25211
In [225]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Seat Lumbar Support"),axis=1)
main["Seat Lumbar Support"]=Filled_DataFrame["Seat Lumbar Support"]
In [226]:
# Cruise Control
Filled_DataFrame=pd.DataFrame()
main["Cruise Control"].isnull().sum()
Out[226]:
14583
In [227]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Cruise Control"),axis=1)
main["Cruise Control"]=Filled_DataFrame["Cruise Control"]
In [228]:
# Cruise Control
Filled_DataFrame=pd.DataFrame()
main["Smart Access Card Entry"].isnull().sum()
Out[228]:
22685
In [229]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Smart Access Card Entry"),axis=1)
main["Smart Access Card Entry"]=Filled_DataFrame["Smart Access Card Entry"]
In [230]:
# KeyLess Entry
Filled_DataFrame=pd.DataFrame()
main["KeyLess Entry"].isnull().sum()
Out[230]:
2584
In [231]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"KeyLess Entry"),axis=1)
main["KeyLess Entry"]=Filled_DataFrame["KeyLess Entry"]
In [232]:
# Engine Start/Stop Button
Filled_DataFrame=pd.DataFrame()
main["Engine Start/Stop Button"].isnull().sum()
Out[232]:
11799
In [233]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Start/Stop Button"),axis=1)
main["Engine Start/Stop Button"]=Filled_DataFrame["Engine Start/Stop Button"]
In [234]:
# Glove Box Cooling
Filled_DataFrame=pd.DataFrame()
main["Glove Box Cooling"].isnull().sum()
Out[234]:
37695
In [235]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Glove Box Cooling"),axis=1)
main["Glove Box Cooling"]=Filled_DataFrame["Glove Box Cooling"]
In [236]:
# Voice Control
Filled_DataFrame=pd.DataFrame()
main["Voice Control"].isnull().sum()
Out[236]:
27901
In [237]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Voice Control"),axis=1)
main["Voice Control"]=Filled_DataFrame["Voice Control"]
In [238]:
# Gear Shift Indicator
Filled_DataFrame=pd.DataFrame()
main["Gear Shift Indicator"].isnull().sum()
Out[238]:
52908
In [239]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Gear Shift Indicator"),axis=1)
main["Gear Shift Indicator"]=Filled_DataFrame["Gear Shift Indicator"]
In [240]:
# Tachometer
Filled_DataFrame=pd.DataFrame()
main["Tachometer"].isnull().sum()
Out[240]:
14511
In [241]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Tachometer"),axis=1)
main["Tachometer"]=Filled_DataFrame["Tachometer"]
In [242]:
# Electronic Multi-Tripmeter
Filled_DataFrame=pd.DataFrame()
main["Electronic Multi-Tripmeter"].isnull().sum()
Out[242]:
5336
In [243]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Electronic Multi-Tripmeter"),axis=1)
main["Electronic Multi-Tripmeter"]=Filled_DataFrame["Electronic Multi-Tripmeter"]
In [244]:
# Fabric Upholstery
Filled_DataFrame=pd.DataFrame()
main["Fabric Upholstery"].isnull().sum()
Out[244]:
10497
In [245]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Fabric Upholstery"),axis=1)
main["Fabric Upholstery"]=Filled_DataFrame["Fabric Upholstery"]
In [246]:
# Leather Steering Wheel
Filled_DataFrame=pd.DataFrame()
main["Leather Steering Wheel"].isnull().sum()
Out[246]:
16480
In [247]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Leather Steering Wheel"),axis=1)
main["Leather Steering Wheel"]=Filled_DataFrame["Leather Steering Wheel"]
In [248]:
# Glove Compartment
Filled_DataFrame=pd.DataFrame()
main["Glove Compartment"].isnull().sum()
Out[248]:
1627
In [249]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Glove Compartment"),axis=1)
main["Glove Compartment"]=Filled_DataFrame["Glove Compartment"]
In [250]:
# Digital Clock
Filled_DataFrame=pd.DataFrame()
main["Digital Clock"].isnull().sum()
Out[250]:
5408
In [251]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Digital Clock"),axis=1)
main["Digital Clock"]=Filled_DataFrame["Digital Clock"]
In [252]:
# Digital Odometer
Filled_DataFrame=pd.DataFrame()
main["Digital Odometer"].isnull().sum()
Out[252]:
14334
In [253]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Digital Odometer"),axis=1)
main["Digital Odometer"]=Filled_DataFrame["Digital Odometer"]
In [254]:
# Height Adjustable Driver Seat
Filled_DataFrame=pd.DataFrame()
main["Height Adjustable Driver Seat"].isnull().sum()
Out[254]:
14696
In [255]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Height Adjustable Driver Seat"),axis=1)
main["Height Adjustable Driver Seat"]=Filled_DataFrame["Height Adjustable Driver Seat"]
In [256]:
# Dual Tone Dashboard
Filled_DataFrame=pd.DataFrame()
main["Dual Tone Dashboard"].isnull().sum()
Out[256]:
27826
In [257]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Dual Tone Dashboard"),axis=1)
main["Dual Tone Dashboard"]=Filled_DataFrame["Dual Tone Dashboard"]
In [258]:
# Adjustable Headlights
Filled_DataFrame=pd.DataFrame()
main["Adjustable Headlights"].isnull().sum()
Out[258]:
1802
In [259]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Adjustable Headlights"),axis=1)
main["Adjustable Headlights"]=Filled_DataFrame["Adjustable Headlights"]
In [260]:
# Electric Folding Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Electric Folding Rear View Mirror"].isnull().sum()
Out[260]:
7675
In [261]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Electric Folding Rear View Mirror"),axis=1)
main["Electric Folding Rear View Mirror"]=Filled_DataFrame["Electric Folding Rear View Mirror"]
In [262]:
# Rear Window Wiper
Filled_DataFrame=pd.DataFrame()
main["Rear Window Wiper"].isnull().sum()
Out[262]:
23379
In [263]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Window Wiper"),axis=1)
main["Rear Window Wiper"]=Filled_DataFrame["Rear Window Wiper"]
In [264]:
# Rear Window Defogger
Filled_DataFrame=pd.DataFrame()
main["Rear Window Defogger"].isnull().sum()
Out[264]:
12448
In [265]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Window Defogger"),axis=1)
main["Rear Window Defogger"]=Filled_DataFrame["Rear Window Defogger"]
In [266]:
# Rear Window Defogger
Filled_DataFrame=pd.DataFrame()
main["Rear Spoiler"].isnull().sum()
Out[266]:
31738
In [267]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Spoiler"),axis=1)
main["Rear Spoiler"]=Filled_DataFrame["Rear Spoiler"]
In [268]:
# Sun Roof
Filled_DataFrame=pd.DataFrame()
main["Sun Roof"].isnull().sum()
Out[268]:
30497
In [269]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Sun Roof"),axis=1)
main["Sun Roof"]=Filled_DataFrame["Sun Roof"]
In [270]:
# Moon Roof
Filled_DataFrame=pd.DataFrame()
main["Moon Roof"].isnull().sum()
Out[270]:
31052
In [271]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Moon Roof"),axis=1)
main["Moon Roof"]=Filled_DataFrame["Moon Roof"]
In [272]:
# Outside Rear View Mirror Turn Indicators
Filled_DataFrame=pd.DataFrame()
main["Outside Rear View Mirror Turn Indicators"].isnull().sum()
Out[272]:
14934
In [273]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Outside Rear View Mirror Turn Indicators"),axis=1)
main["Outside Rear View Mirror Turn Indicators"]=Filled_DataFrame["Outside Rear View Mirror Turn Indicators"]
In [274]:
# Intergrated Antenna
Filled_DataFrame=pd.DataFrame()
main["Intergrated Antenna"].isnull().sum()
Out[274]:
24501
In [275]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Intergrated Antenna"),axis=1)
main["Intergrated Antenna"]=Filled_DataFrame["Intergrated Antenna"]
In [276]:
# Intergrated Antenna
Filled_DataFrame=pd.DataFrame()
main["Chrome Grille"].isnull().sum()
Out[276]:
33710
In [277]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Chrome Grille"),axis=1)
main["Chrome Grille"]=Filled_DataFrame["Chrome Grille"]
In [278]:
# Halogen Headlamps
Filled_DataFrame=pd.DataFrame()
main["Halogen Headlamps"].isnull().sum()
Out[278]:
39907
In [279]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Halogen Headlamps"),axis=1)
main["Halogen Headlamps"]=Filled_DataFrame["Halogen Headlamps"]
In [280]:
# Roof Rail
Filled_DataFrame=pd.DataFrame()
main["Roof Rail"].isnull().sum()
Out[280]:
36422
In [281]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Roof Rail"),axis=1)
main["Roof Rail"]=Filled_DataFrame["Roof Rail"]
In [282]:
# LED DRLs
Filled_DataFrame=pd.DataFrame()
main["LED DRLs"].isnull().sum()
Out[282]:
26018
In [283]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"LED DRLs"),axis=1)
main["LED DRLs"]=Filled_DataFrame["LED DRLs"]
In [284]:
# LED Taillights
Filled_DataFrame=pd.DataFrame()
main["LED Taillights"].isnull().sum()
Out[284]:
37530
In [285]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"LED Taillights"),axis=1)
main["LED Taillights"]=Filled_DataFrame["LED Taillights"]
In [286]:
# Anti-Lock Braking System
Filled_DataFrame=pd.DataFrame()
main["Anti-Lock Braking System"].isnull().sum()
Out[286]:
2045
In [287]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Anti-Lock Braking System"),axis=1)
main["Anti-Lock Braking System"]=Filled_DataFrame["Anti-Lock Braking System"]
In [288]:
# Central Locking
Filled_DataFrame=pd.DataFrame()
main["Central Locking"].isnull().sum()
Out[288]:
2651
In [289]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Central Locking"),axis=1)
main["Central Locking"]=Filled_DataFrame["Central Locking"]
In [290]:
# Power Door Locks
Filled_DataFrame=pd.DataFrame()
main["Power Door Locks"].isnull().sum()
Out[290]:
3476
In [291]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Door Locks"),axis=1)
main["Power Door Locks"]=Filled_DataFrame["Power Door Locks"]
In [292]:
# Child Safety Locks
Filled_DataFrame=pd.DataFrame()
main["Child Safety Locks"].isnull().sum()
Out[292]:
6924
In [293]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Child Safety Locks"),axis=1)
main["Child Safety Locks"]=Filled_DataFrame["Child Safety Locks"]
In [294]:
# Side Airbag-Front
Filled_DataFrame=pd.DataFrame()
main["Side Airbag-Front"].isnull().sum()
Out[294]:
32139
In [295]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Side Airbag-Front"),axis=1)
main["Side Airbag-Front"]=Filled_DataFrame["Side Airbag-Front"]
In [296]:
# Day & Night Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Day & Night Rear View Mirror"].isnull().sum()
Out[296]:
37003
In [297]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Day & Night Rear View Mirror"),axis=1)
main["Day & Night Rear View Mirror"]=Filled_DataFrame["Day & Night Rear View Mirror"]
In [298]:
# Passenger Side Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Passenger Side Rear View Mirror"].isnull().sum()
Out[298]:
2302
In [299]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Passenger Side Rear View Mirror"),axis=1)
main["Passenger Side Rear View Mirror"]=Filled_DataFrame["Passenger Side Rear View Mirror"]
In [300]:
# Rear Seat Belts
Filled_DataFrame=pd.DataFrame()
main["Rear Seat Belts"].isnull().sum()
Out[300]:
6081
In [301]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Seat Belts"),axis=1)
main["Rear Seat Belts"]=Filled_DataFrame["Rear Seat Belts"]
In [302]:
# Seat Belt Warning
Filled_DataFrame=pd.DataFrame()
main["Seat Belt Warning"].isnull().sum()
Out[302]:
15626
In [303]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Seat Belt Warning"),axis=1)
main["Seat Belt Warning"]=Filled_DataFrame["Seat Belt Warning"]
In [304]:
# Door Ajar Warning
Filled_DataFrame=pd.DataFrame()
main["Door Ajar Warning"].isnull().sum()
Out[304]:
37983
In [305]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Door Ajar Warning"),axis=1)
main["Door Ajar Warning"]=Filled_DataFrame["Door Ajar Warning"]
In [306]:
# Adjustable Seats
Filled_DataFrame=pd.DataFrame()
main["Adjustable Seats"].isnull().sum()
Out[306]:
1598
In [307]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Adjustable Seats"),axis=1)
main["Adjustable Seats"]=Filled_DataFrame["Adjustable Seats"]
In [308]:
# Engine Immobilizer
Filled_DataFrame=pd.DataFrame()
main["Engine Immobilizer"].isnull().sum()
Out[308]:
32910
In [309]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Immobilizer"),axis=1)
main["Engine Immobilizer"]=Filled_DataFrame["Engine Immobilizer"]
In [310]:
# Crash Sensor
Filled_DataFrame=pd.DataFrame()
main["Crash Sensor"].isnull().sum()
Out[310]:
3307
In [311]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Crash Sensor"),axis=1)
main["Crash Sensor"]=Filled_DataFrame["Crash Sensor"]
In [312]:
# Engine Check Warning
Filled_DataFrame=pd.DataFrame()
main["Engine Check Warning"].isnull().sum()
Out[312]:
10614
In [313]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Check Warning"),axis=1)
main["Engine Check Warning"]=Filled_DataFrame["Engine Check Warning"]
In [314]:
# Automatic Headlamps
Filled_DataFrame=pd.DataFrame()
main["Automatic Headlamps"].isnull().sum()
Out[314]:
21576
In [315]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Automatic Headlamps"),axis=1)
main["Automatic Headlamps"]=Filled_DataFrame["Automatic Headlamps"]
In [316]:
# EBD
Filled_DataFrame=pd.DataFrame()
main["EBD"].isnull().sum()
Out[316]:
4208
In [317]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"EBD"),axis=1)
main["EBD"]=Filled_DataFrame["EBD"]
In [318]:
# Electronic Stability Control
Filled_DataFrame=pd.DataFrame()
main["Electronic Stability Control"].isnull().sum()
Out[318]:
41817
In [319]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Electronic Stability Control"),axis=1)
main["Electronic Stability Control"]=Filled_DataFrame["Electronic Stability Control"]
In [320]:
# Rear Camera
Filled_DataFrame=pd.DataFrame()
main["Rear Camera"].isnull().sum()
Out[320]:
11678
In [321]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Camera"),axis=1)
main["Rear Camera"]=Filled_DataFrame["Rear Camera"]
In [322]:
# ISOFIX Child Seat Mounts
Filled_DataFrame=pd.DataFrame()
main["ISOFIX Child Seat Mounts"].isnull().sum()
Out[322]:
22738
In [323]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"ISOFIX Child Seat Mounts"),axis=1)
main["ISOFIX Child Seat Mounts"]=Filled_DataFrame["ISOFIX Child Seat Mounts"]
In [324]:
# Pretensioners & Force Limiter Seatbelts
Filled_DataFrame=pd.DataFrame()
main["Pretensioners & Force Limiter Seatbelts"].isnull().sum()
Out[324]:
32582
In [325]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Pretensioners & Force Limiter Seatbelts"),axis=1)
main["Pretensioners & Force Limiter Seatbelts"]=Filled_DataFrame["Pretensioners & Force Limiter Seatbelts"]
In [326]:
# Hill Assist
Filled_DataFrame=pd.DataFrame()
main["Hill Assist"].isnull().sum()
Out[326]:
30140
In [327]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Hill Assist"),axis=1)
main["Hill Assist"]=Filled_DataFrame["Hill Assist"]
In [328]:
# Radio
Filled_DataFrame=pd.DataFrame()
main["Radio"].isnull().sum()
Out[328]:
2410
In [329]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Radio"),axis=1)
main["Radio"]=Filled_DataFrame["Radio"]
In [330]:
# Audio System Remote Control
Filled_DataFrame=pd.DataFrame()
main["Audio System Remote Control"].isnull().sum()
Out[330]:
64272
In [331]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Audio System Remote Control"),axis=1)
main["Audio System Remote Control"]=Filled_DataFrame["Audio System Remote Control"]
In [332]:
# Speakers Front
Filled_DataFrame=pd.DataFrame()
main["Speakers Front"].isnull().sum()
Out[332]:
2448
In [333]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Speakers Front"),axis=1)
main["Speakers Front"]=Filled_DataFrame["Speakers Front"]
In [334]:
# Speakers Rear
Filled_DataFrame=pd.DataFrame()
main["Speakers Rear"].isnull().sum()
Out[334]:
2389
In [335]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Speakers Rear"),axis=1)
main["Speakers Rear"]=Filled_DataFrame["Speakers Rear"]
In [336]:
# Integrated 2DIN Audio
Filled_DataFrame=pd.DataFrame()
main["Integrated 2DIN Audio"].isnull().sum()
Out[336]:
6788
In [337]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Integrated 2DIN Audio"),axis=1)
main["Integrated 2DIN Audio"]=Filled_DataFrame["Integrated 2DIN Audio"]
In [338]:
# USB & Auxiliary input
Filled_DataFrame=pd.DataFrame()
main["USB & Auxiliary input"].isnull().sum()
Out[338]:
37024
In [339]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"USB & Auxiliary input"),axis=1)
main["USB & Auxiliary input"]=Filled_DataFrame["USB & Auxiliary input"]
In [340]:
# Bluetooth Connectivity
Filled_DataFrame=pd.DataFrame()
main["Bluetooth Connectivity"].isnull().sum()
Out[340]:
5488
In [341]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Bluetooth Connectivity"),axis=1)
main["Bluetooth Connectivity"]=Filled_DataFrame["Bluetooth Connectivity"]
In [342]:
# Android Auto
Filled_DataFrame=pd.DataFrame()
main["Android Auto"].isnull().sum()
Out[342]:
23316
In [343]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Android Auto"),axis=1)
main["Android Auto"]=Filled_DataFrame["Android Auto"]
In [344]:
# Apple CarPlay
Filled_DataFrame=pd.DataFrame()
main["Apple CarPlay"].isnull().sum()
Out[344]:
23881
In [345]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Apple CarPlay"),axis=1)
main["Apple CarPlay"]=Filled_DataFrame["Apple CarPlay"]
In [346]:
# Turning Radius (Metres)
Filled_DataFrame=pd.DataFrame()
main["Turning Radius (Metres)"].isnull().sum()
Out[346]:
56190
In [347]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Turning Radius (Metres)"),axis=1)
main["Turning Radius (Metres)"]=Filled_DataFrame["Turning Radius (Metres)"]
In [348]:
# Vanity Mirror
Filled_DataFrame=pd.DataFrame()
main["Vanity Mirror"].isnull().sum()
Out[348]:
37298
In [349]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Vanity Mirror"),axis=1)
main["Vanity Mirror"]=Filled_DataFrame["Vanity Mirror"]
In [350]:
# Navigation System
Filled_DataFrame=pd.DataFrame()
main["Navigation System"].isnull().sum()
Out[350]:
35966
In [351]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Navigation System"),axis=1)
main["Navigation System"]=Filled_DataFrame["Navigation System"]
In [352]:
# Outside Temperature Display
Filled_DataFrame=pd.DataFrame()
main["Outside Temperature Display"].isnull().sum()
Out[352]:
61521
In [353]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Outside Temperature Display"),axis=1)
main["Outside Temperature Display"]=Filled_DataFrame["Outside Temperature Display"]
In [354]:
# Manually Adjustable Ext. Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Manually Adjustable Ext. Rear View Mirror"].isnull().sum()
Out[354]:
24027
In [355]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Manually Adjustable Ext. Rear View Mirror"),axis=1)
main["Manually Adjustable Ext. Rear View Mirror"]=Filled_DataFrame["Manually Adjustable Ext. Rear View Mirror"]
In [356]:
# Power Antenna
Filled_DataFrame=pd.DataFrame()
main["Power Antenna"].isnull().sum()
Out[356]:
27849
In [357]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Antenna"),axis=1)
main["Power Antenna"]=Filled_DataFrame["Power Antenna"]
In [358]:
# Brake Assist
Filled_DataFrame=pd.DataFrame()
main["Brake Assist"].isnull().sum()
Out[358]:
45067
In [359]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Brake Assist"),axis=1)
main["Brake Assist"]=Filled_DataFrame["Brake Assist"]
In [360]:
# Anti-Theft Alarm
Filled_DataFrame=pd.DataFrame()
main["Anti-Theft Alarm"].isnull().sum()
Out[360]:
48915
In [361]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Anti-Theft Alarm"),axis=1)
main["Anti-Theft Alarm"]=Filled_DataFrame["Anti-Theft Alarm"]
In [362]:
# Speed Sensing Auto Door Lock
Filled_DataFrame=pd.DataFrame()
main["Speed Sensing Auto Door Lock"].isnull().sum()
Out[362]:
31430
In [363]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Speed Sensing Auto Door Lock"),axis=1)
main["Speed Sensing Auto Door Lock"]=Filled_DataFrame["Speed Sensing Auto Door Lock"]
In [364]:
# Chrome Garnish
Filled_DataFrame=pd.DataFrame()
main["Chrome Garnish"].isnull().sum()
Out[364]:
47823
In [365]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Chrome Garnish"),axis=1)
main["Chrome Garnish"]=Filled_DataFrame["Chrome Garnish"]
In [366]:
# Side Impact Beams
Filled_DataFrame=pd.DataFrame()
main["Side Impact Beams"].isnull().sum()
Out[366]:
62890
In [367]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Side Impact Beams"),axis=1)
main["Side Impact Beams"]=Filled_DataFrame["Side Impact Beams"]
In [368]:
# Drive Type
Filled_DataFrame=pd.DataFrame()
main["Drive Type"].isnull().sum()
Out[368]:
53327
In [369]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Drive Type"),axis=1)
main["Drive Type"]=Filled_DataFrame["Drive Type"]
In [370]:
# Rear Reading Lamp
Filled_DataFrame=pd.DataFrame()
main["Rear Reading Lamp"].isnull().sum()
Out[370]:
42496
In [371]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Reading Lamp"),axis=1)
main["Rear Reading Lamp"]=Filled_DataFrame["Rear Reading Lamp"]
In [372]:
# Cup Holders-Front
Filled_DataFrame=pd.DataFrame()
main["Cup Holders-Front"].isnull().sum()
Out[372]:
44164
In [373]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Cup Holders-Front"),axis=1)
main["Cup Holders-Front"]=Filled_DataFrame["Cup Holders-Front"]
In [374]:
# Leather Seats
Filled_DataFrame=pd.DataFrame()
main["Leather Seats"].isnull().sum()
Out[374]:
29188
In [375]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Leather Seats"),axis=1)
main["Leather Seats"]=Filled_DataFrame["Leather Seats"]
In [376]:
# Driving Experience Control Eco
Filled_DataFrame=pd.DataFrame()
main["Driving Experience Control Eco"].isnull().sum()
Out[376]:
44979
In [377]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Driving Experience Control Eco"),axis=1)
main["Driving Experience Control Eco"]=Filled_DataFrame["Driving Experience Control Eco"]
In [378]:
# Ventilated Seats
Filled_DataFrame=pd.DataFrame()
main["Ventilated Seats"].isnull().sum()
Out[378]:
47550
In [379]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Ventilated Seats"),axis=1)
main["Ventilated Seats"]=Filled_DataFrame["Ventilated Seats"]
In [380]:
# Tyre Pressure Monitor
Filled_DataFrame=pd.DataFrame()
main["Tyre Pressure Monitor"].isnull().sum()
Out[380]:
31789
In [381]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Tyre Pressure Monitor"),axis=1)
main["Tyre Pressure Monitor"]=Filled_DataFrame["Tyre Pressure Monitor"]
In [382]:
# Rain Sensing Wiper
Filled_DataFrame=pd.DataFrame()
main["Rain Sensing Wiper"].isnull().sum()
Out[382]:
36739
In [383]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rain Sensing Wiper"),axis=1)
main["Rain Sensing Wiper"]=Filled_DataFrame["Rain Sensing Wiper"]
In [384]:
# Turbo Charger
Filled_DataFrame=pd.DataFrame()
main["Turbo Charger"].isnull().sum()
Out[384]:
30909
In [385]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Turbo Charger"),axis=1)
main["Turbo Charger"]=Filled_DataFrame["Turbo Charger"]
In [386]:
# Air Quality Control
Filled_DataFrame=pd.DataFrame()
main["Air Quality Control"].isnull().sum()
Out[386]:
53313
In [387]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Air Quality Control"),axis=1)
main["Air Quality Control"]=Filled_DataFrame["Air Quality Control"]
In [388]:
# Traction Control
Filled_DataFrame=pd.DataFrame()
main["Traction Control"].isnull().sum()
Out[388]:
44622
In [389]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Traction Control"),axis=1)
main["Traction Control"]=Filled_DataFrame["Traction Control"]
In [390]:
# Vehicle Stability Control System
Filled_DataFrame=pd.DataFrame()
main["Vehicle Stability Control System"].isnull().sum()
Out[390]:
56095
In [391]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Vehicle Stability Control System"),axis=1)
main["Vehicle Stability Control System"]=Filled_DataFrame["Vehicle Stability Control System"]
In [392]:
# Rear Window Washer
Filled_DataFrame=pd.DataFrame()
main["Rear Window Washer"].isnull().sum()
Out[392]:
26432
In [393]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Window Washer"),axis=1)
In [394]:
main["Rear Window Washer"]=Filled_DataFrame["Rear Window Washer"]
In [395]:
# Rear Window Washer
Filled_DataFrame=pd.DataFrame()
main["Rear Window Washer"].isnull().sum()
Out[395]:
0
In [396]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Window Washer"),axis=1)
In [392]:
# Gear Box
Filled_DataFrame=pd.DataFrame()
main["Gear Box"].isnull().sum()
Out[392]:
26432
In [396]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Gear Box"),axis=1)
In [397]:
main["Gear Box"]=Filled_DataFrame["Gear Box"]
In [398]:
#Follow Me Home Headlamps
Filled_DataFrame=pd.DataFrame()
main["Follow Me Home Headlamps"].isnull().sum()
Out[398]:
29197
In [399]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Follow Me Home Headlamps"),axis=1)
In [400]:
main["Follow Me Home Headlamps"]=Filled_DataFrame["Follow Me Home Headlamps"]
In [401]:
main.columns
Out[401]:
Index(['Unnamed: 0', 'Model', 'Brand', 'Varient', 'ARAI Mileage(Km/L)',
        'Engine Displacement (cc)', 'Seating Capacity', 'Boot Space (Litres)',
        'Body Type', 'Fuel Type',
        ...
        'Tyre Pressure Monitor', 'Rain Sensing Wiper', 'Turbo Charger',
        'Air Quality Control', 'Traction Control',
        'Vehicle Stability Control System', 'BHP', 'RPM', 'NM', 'NM_RPM'],
      dtype='object', length=148)
In [402]:
main.isnull().sum()
Out[402]:
Unnamed: 0                          0
Model                               0
Brand                               0
Varient                             0
ARAI Mileage(Km/L)                  0
                                    ..
Vehicle Stability Control System    0
BHP                                 0
RPM                                 0
NM                                  0
NM_RPM                              0
Length: 148, dtype: int64
In [6]:
pd.set_option("display.max_rows",None)
pd.DataFrame(main.isnull().any())
Out[6]:
0
Unnamed: 0 False
Unnamed: 0.1 False
Model False
Brand False
Varient False
ARAI Mileage(Km/L) False
Engine Displacement (cc) False
Seating Capacity False
Boot Space (Litres) False
Body Type False
Fuel Type False
No. of cylinder False
TransmissionType False
Fuel Tank Capacity False
Multi-function Steering Wheel False
Touch Screen False
Engine Start Stop Button False
Alloy Wheels False
Power Windows Rear False
Wheel Covers False
Driver Airbag False
Air Conditioner False
Power Adjustable Exterior Rear View Mirror False
Automatic Climate Control False
Anti Lock Braking System False
Fog Lights - Front False
Power Windows Front False
Passenger Airbag False
Power Steering False
Engine Type False
Gear Box False
Emission Norm Compliance False
Front Suspension False
Rear Suspension False
Steering Type False
Steering Column False
Front Brake Type False
Rear Brake Type False
Length (mm) False
Width (mm) False
Height (mm) False
Heater False
Adjustable Steering False
Low Fuel Warning Light False
Accessory Power Outlet False
Trunk Light False
Rear Seat Headrest False
Adjustable Headrest False
Rear Seat Centre Arm Rest False
Height Adjustable Front Seat Belts False
Cup Holders-Rear False
Rear AC Vents False
Seat Lumbar Support False
Cruise Control False
Smart Access Card Entry False
KeyLess Entry False
Engine Start/Stop Button False
Glove Box Cooling False
Voice Control False
Gear Shift Indicator False
Tachometer False
Electronic Multi-Tripmeter False
Fabric Upholstery False
Leather Steering Wheel False
Glove Compartment False
Digital Clock False
Digital Odometer False
Height Adjustable Driver Seat False
Dual Tone Dashboard False
Adjustable Headlights False
Electric Folding Rear View Mirror False
Rear Window Wiper False
Rear Window Washer False
Rear Window Defogger False
Rear Spoiler False
Sun Roof False
Moon Roof False
Outside Rear View Mirror Turn Indicators False
Intergrated Antenna False
Chrome Grille False
Halogen Headlamps False
Roof Rail False
LED DRLs False
LED Taillights False
Anti-Lock Braking System False
Central Locking False
Power Door Locks False
Child Safety Locks False
Side Airbag-Front False
Day & Night Rear View Mirror False
Passenger Side Rear View Mirror False
Rear Seat Belts False
Seat Belt Warning False
Door Ajar Warning False
Adjustable Seats False
Engine Immobilizer False
Crash Sensor False
Engine Check Warning False
Automatic Headlamps False
EBD False
Electronic Stability Control False
Follow Me Home Headlamps False
Rear Camera False
ISOFIX Child Seat Mounts False
Pretensioners & Force Limiter Seatbelts False
Hill Assist False
Radio False
Audio System Remote Control False
Speakers Front False
Speakers Rear False
Integrated 2DIN Audio False
USB & Auxiliary input False
Bluetooth Connectivity False
Android Auto False
Apple CarPlay False
Place False
option False
Price False
City Mileage(Km/L) False
Turning Radius (Metres) False
Vanity Mirror False
Navigation System False
Outside Temperature Display False
Manually Adjustable Ext. Rear View Mirror False
Power Antenna False
Brake Assist False
Anti-Theft Alarm False
Speed Sensing Auto Door Lock False
Chrome Garnish False
Side Impact Beams False
Drive Type False
Rear Reading Lamp False
Cup Holders-Front False
Leather Seats False
Driving Experience Control Eco False
Ventilated Seats False
Tyre Pressure Monitor False
Rain Sensing Wiper False
Turbo Charger False
Air Quality Control False
Traction Control False
Vehicle Stability Control System False
BHP False
RPM False
NM False
NM_RPM False
In [7]:
main.to_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//final_Preprocessed_datas2.csv")